{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from sklearn.preprocessing import LabelEncoder, OneHotEncoder\n",
    "from sklearn.model_selection import GridSearchCV, train_test_split\n",
    "from sklearn.model_selection import cross_val_score\n",
    "from sklearn.linear_model import Ridge, Lasso, ElasticNet\n",
    "from sklearn.ensemble import RandomForestRegressor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {},
   "outputs": [],
   "source": [
    "#获取数据\n",
    "df_train = pd.read_csv(\"C:/Users/Lenovo/Documents/train.csv\")\n",
    "df_test = pd.read_csv(\"C:/Users/Lenovo/Documents/test.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1460, 81)"
      ]
     },
     "execution_count": 121,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1459, 80)"
      ]
     },
     "execution_count": 122,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>MSSubClass</th>\n",
       "      <th>MSZoning</th>\n",
       "      <th>LotFrontage</th>\n",
       "      <th>LotArea</th>\n",
       "      <th>Street</th>\n",
       "      <th>Alley</th>\n",
       "      <th>LotShape</th>\n",
       "      <th>LandContour</th>\n",
       "      <th>Utilities</th>\n",
       "      <th>...</th>\n",
       "      <th>PoolArea</th>\n",
       "      <th>PoolQC</th>\n",
       "      <th>Fence</th>\n",
       "      <th>MiscFeature</th>\n",
       "      <th>MiscVal</th>\n",
       "      <th>MoSold</th>\n",
       "      <th>YrSold</th>\n",
       "      <th>SaleType</th>\n",
       "      <th>SaleCondition</th>\n",
       "      <th>SalePrice</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>65.0</td>\n",
       "      <td>8450</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>208500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>80.0</td>\n",
       "      <td>9600</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>2007</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>181500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>68.0</td>\n",
       "      <td>11250</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>223500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>70</td>\n",
       "      <td>RL</td>\n",
       "      <td>60.0</td>\n",
       "      <td>9550</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2006</td>\n",
       "      <td>WD</td>\n",
       "      <td>Abnorml</td>\n",
       "      <td>140000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>84.0</td>\n",
       "      <td>14260</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>250000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 81 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Id  MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \\\n",
       "0   1          60       RL         65.0     8450   Pave   NaN      Reg   \n",
       "1   2          20       RL         80.0     9600   Pave   NaN      Reg   \n",
       "2   3          60       RL         68.0    11250   Pave   NaN      IR1   \n",
       "3   4          70       RL         60.0     9550   Pave   NaN      IR1   \n",
       "4   5          60       RL         84.0    14260   Pave   NaN      IR1   \n",
       "\n",
       "  LandContour Utilities    ...     PoolArea PoolQC Fence MiscFeature MiscVal  \\\n",
       "0         Lvl    AllPub    ...            0    NaN   NaN         NaN       0   \n",
       "1         Lvl    AllPub    ...            0    NaN   NaN         NaN       0   \n",
       "2         Lvl    AllPub    ...            0    NaN   NaN         NaN       0   \n",
       "3         Lvl    AllPub    ...            0    NaN   NaN         NaN       0   \n",
       "4         Lvl    AllPub    ...            0    NaN   NaN         NaN       0   \n",
       "\n",
       "  MoSold YrSold  SaleType  SaleCondition  SalePrice  \n",
       "0      2   2008        WD         Normal     208500  \n",
       "1      5   2007        WD         Normal     181500  \n",
       "2      9   2008        WD         Normal     223500  \n",
       "3      2   2006        WD        Abnorml     140000  \n",
       "4     12   2008        WD         Normal     250000  \n",
       "\n",
       "[5 rows x 81 columns]"
      ]
     },
     "execution_count": 123,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>MSSubClass</th>\n",
       "      <th>MSZoning</th>\n",
       "      <th>LotFrontage</th>\n",
       "      <th>LotArea</th>\n",
       "      <th>Street</th>\n",
       "      <th>Alley</th>\n",
       "      <th>LotShape</th>\n",
       "      <th>LandContour</th>\n",
       "      <th>Utilities</th>\n",
       "      <th>...</th>\n",
       "      <th>ScreenPorch</th>\n",
       "      <th>PoolArea</th>\n",
       "      <th>PoolQC</th>\n",
       "      <th>Fence</th>\n",
       "      <th>MiscFeature</th>\n",
       "      <th>MiscVal</th>\n",
       "      <th>MoSold</th>\n",
       "      <th>YrSold</th>\n",
       "      <th>SaleType</th>\n",
       "      <th>SaleCondition</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1461</td>\n",
       "      <td>20</td>\n",
       "      <td>RH</td>\n",
       "      <td>80.0</td>\n",
       "      <td>11622</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>120</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>MnPrv</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1462</td>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>81.0</td>\n",
       "      <td>14267</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Gar2</td>\n",
       "      <td>12500</td>\n",
       "      <td>6</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1463</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>74.0</td>\n",
       "      <td>13830</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>MnPrv</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1464</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>78.0</td>\n",
       "      <td>9978</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1465</td>\n",
       "      <td>120</td>\n",
       "      <td>RL</td>\n",
       "      <td>43.0</td>\n",
       "      <td>5005</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>HLS</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>144</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 80 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Id  MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \\\n",
       "0  1461          20       RH         80.0    11622   Pave   NaN      Reg   \n",
       "1  1462          20       RL         81.0    14267   Pave   NaN      IR1   \n",
       "2  1463          60       RL         74.0    13830   Pave   NaN      IR1   \n",
       "3  1464          60       RL         78.0     9978   Pave   NaN      IR1   \n",
       "4  1465         120       RL         43.0     5005   Pave   NaN      IR1   \n",
       "\n",
       "  LandContour Utilities      ...       ScreenPorch PoolArea PoolQC  Fence  \\\n",
       "0         Lvl    AllPub      ...               120        0    NaN  MnPrv   \n",
       "1         Lvl    AllPub      ...                 0        0    NaN    NaN   \n",
       "2         Lvl    AllPub      ...                 0        0    NaN  MnPrv   \n",
       "3         Lvl    AllPub      ...                 0        0    NaN    NaN   \n",
       "4         HLS    AllPub      ...               144        0    NaN    NaN   \n",
       "\n",
       "  MiscFeature MiscVal MoSold  YrSold  SaleType  SaleCondition  \n",
       "0         NaN       0      6    2010        WD         Normal  \n",
       "1        Gar2   12500      6    2010        WD         Normal  \n",
       "2         NaN       0      3    2010        WD         Normal  \n",
       "3         NaN       0      6    2010        WD         Normal  \n",
       "4         NaN       0      1    2010        WD         Normal  \n",
       "\n",
       "[5 rows x 80 columns]"
      ]
     },
     "execution_count": 124,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count      1460.000000\n",
       "mean     180921.195890\n",
       "std       79442.502883\n",
       "min       34900.000000\n",
       "25%      129975.000000\n",
       "50%      163000.000000\n",
       "75%      214000.000000\n",
       "max      755000.000000\n",
       "Name: SalePrice, dtype: float64"
      ]
     },
     "execution_count": 125,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#查看标签值SalePrice大致分布情况\n",
    "df_train['SalePrice'].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\scipy\\stats\\stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.\n",
      "  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x289498b39e8>"
      ]
     },
     "execution_count": 126,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZYAAAEKCAYAAAAxXHOuAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3Xl8XNWV6PvfqirNs2XJgzxj2SATYkCxmQIEOmDSnZjchhtDBpKQuHMb7u1c7gB0uvM6vOTddm6/uF8nkIQOSZN0iHHTnYuTJiEDQ0IAgwwGLBtjWTayLA+SrcGaVdJ6f5wtU5SrVCW5pJrW9/PRR1W79tl71aBa2ufss4+oKsYYY0yi+JIdgDHGmMxiicUYY0xCWWIxxhiTUJZYjDHGJJQlFmOMMQllicUYY0xCWWIxxhiTUJZYjDHGJJQlFmOMMQkVSHYAyTB79mxdsmRJssMwxpi0smPHjg5VrYpVLysTy5IlS2hoaEh2GMYYk1ZE5O146tmuMGOMMQllicUYY0xCWWIxxhiTUJZYjDHGJJQlFmOMMQllicUYY0xCWWIxxhiTUJZYjDHGJJQlFmOMMQmVlWfeG88j21vOKLt17aIkRGKMySQ2YjHGGJNQcSUWEVknIntFpElE7onweJ6IPOoe3y4iS0Ieu9eV7xWR62O1KSJLXRv7XJu5E/UhIjki8rCIvCEie0Tk3qm+GMYYY85ezMQiIn7gfuAGoA64RUTqwqrdDnSq6nJgM7DJbVsHbABWAeuAB0TEH6PNTcBmVa0FOl3bUfsAbgbyVPU9wMXAn4UmNmOMMTMrnhHLGqBJVZtVdRjYAqwPq7MeeNjdfgy4VkTElW9R1SFVPQA0ufYitum2uca1gWvzxhh9KFAkIgGgABgGeuJ+BYwxxiRUPImlBjgUcr/VlUWso6pBoBuonGDbaOWVQJdrI7yvaH08BvQBR4AW4O9U9WQcz8sYY8w0iCexSIQyjbNOoson6mMNMArMB5YC/01EloVXFJGNItIgIg3t7e0RmjLGGJMI8Uw3bgUWhtxfALRFqdPqdkmVASdjbBupvAMoF5GAG5WE1o/Wx63AL1V1BDguIn8A6oHm0ABV9UHgQYD6+vrwxGicSFOQwaYhG2PiF8+I5WWg1s3WysU7GL8trM424DZ3+ybgKVVVV77BzehaCtQCL0Vr023ztGsD1+bjMfpoAa4RTxFwCfBm/C+BMcaYRIo5YlHVoIjcCTwJ+IHvq2qjiNwHNKjqNuAh4Eci0oQ3itjgtm0Uka3AbiAI3KGqowCR2nRd3g1sEZGvAq+6tonWB97ssh8Au/B2l/1AVV+f8itijDHmrIj3T392qa+vV7vmffTdXpHYrjBjjIjsUNX6WPXszHtjjDEJZYnFGGNMQlliMcYYk1CWWIwxxiSUJRZjjDEJZYnFGGNMQlliMcYYk1CWWIwxxiSUJRZjjDEJZde8NwCoKjve7qRveJSAT1g0q5CFswqTHZYxJg1ZYjEAtHYO8G+vHj59vzDXz93rziXHb4NaY8zk2LeGAeD11i78PuGeG87ltkuX0D88yuutXckOyxiThiyxGMZUeeNwNyuqiynNz2HFnGLmlObxwv4TZOMipcaYs2OJxfD2iX56BoNcsKAcABHhsmWzaese5O0T/UmOzhiTbiyxGF5v7SLHL5w7r+R02XsXllOQ4+f55hNJjMwYk44ssWS50TFl1+FuVs4tJS/gP12eG/DxviUV7G7rpntgJIkRGmPSTVyJRUTWicheEWkSkXsiPJ4nIo+6x7eLyJKQx+515XtF5PpYbbrLFW8XkX2uzdyJ+hCRj4vIzpCfMRFZPdUXJNsc6Oijb3iUC2rKznjsosUVjCm8ebQnCZEZY9JVzMQiIn68y//eANQBt4hIXVi124FOVV0ObAY2uW3r8C4hvApYBzwgIv4YbW4CNqtqLdDp2o7ah6r+WFVXq+pq4JPAQVXdOfmXIjvtO34Kv09YObfkjMeqivOoKMzhraOnkhCZMSZdxTNiWQM0qWqzqg4DW4D1YXXWAw+7248B14qIuPItqjqkqgeAJtdexDbdNte4NnBt3hijj1C3AD+J4zkZ51jPINUleRHPVxERVswpYX97H0PB0SREZ4xJR/EklhrgUMj9VlcWsY6qBoFuoHKCbaOVVwJdro3wvqL1EepjWGKZlKPdg8wpzY/6+Io5JQyPjvHygc4ZjMoYk87iSSzhowKA8JMbotVJVHnMOERkLdCvqrsi1ENENopIg4g0tLe3R6qSdQaGR+kZDDJ3gsRyTlUxfp/wzN7jMxiZMSadxZNYWoGFIfcXAG3R6ohIACgDTk6wbbTyDqDctRHeV7Q+xm1ggtGKqj6oqvWqWl9VVTXB080eR3sGASYcseQGfCydXcQzb1kyNsbEJ57E8jJQ62Zr5eJ9gW8Lq7MNuM3dvgl4Sr1TtrcBG9yMrqVALfBStDbdNk+7NnBtPh6jD0TEB9yMd6zGxGk8scwti55YwNsd1nS8l9ZOO1nSGBNbzMTijmfcCTwJ7AG2qmqjiNwnIh9x1R4CKkWkCbgLuMdt2whsBXYDvwTuUNXRaG26tu4G7nJtVbq2o/bhXAm0qmrzVF6EbHWse5D8HB+l+ROvRbpiTjEAz+y1UYsxJjbJxrWg6uvrtaGhIdlhJN2VX38an8DGK8+ZsJ6q8u1n93Pu3FK+d1v9DEVnjEk1IrJDVWN+CdiZ91lKVTnWM/GMsHEiwtUrq3h+f4dNOzbGxGTXY8lSh7sGGAqOxTy+Mu4DK6v55xdbePlAJ1fUzgbgke0tEeveunZRwuI0xqQfG7Fkqb3ubPqJphqHuvScSnL9Ppt2bIyJyRJLlnrTJZZ4doUBFOYGWLtslk07NsbEZIklS+09eoryghzyc/yxKztXraiyacfGmJgssWSpvUdPxT1aGXf1ymrAph0bYyZmiSULqSoHT/RRVZI3qe3OqSpiQUWBHWcxxkzIEksWaj81xFBwjFlFuZPaTkT4wMpq/tB0gsERm3ZsjInMEksWajnpHSOpKJxcYgG49rxqBkZG+UNTR6LDMsZkCEssWWg8sUx2xAJw2TmzKckL8GTj0USHZYzJEJZYstChkwMAlBfmTHrb3ICPD5xbzW/2HGcsC5cDMsbEZoklC7Wc7GduaX7Eq0bG4/pVcznZN8zbJ2zasTHmTJZYstChk/0smlU45e2vWllFbsDH7rbuBEZljMkUlliy0KHOfhbMKpjy9sV5Aa5YPpvGIz1k4+rYxpiJWWLJMoMjoxztGTyrEQvA9avm0NU/wpHuwQRFZozJFJZYsszhrgFUOevE8kfnzUGA3Ud6EhOYMSZjWGLJMofcVOOFZ5lYKovzWFxZxO42SyzGmHeLK7GIyDoR2SsiTSJyT4TH80TkUff4dhFZEvLYva58r4hcH6tNEVnq2tjn2syNo48LROQFEWkUkTdEZHKLYGWR8cRytiMWgFXzSznaM8iJ3qGzbssYkzliJhYR8QP3AzcAdcAtIlIXVu12oFNVlwObgU1u2zpgA7AKWAc8ICL+GG1uAjarai3Q6dqeqI8A8M/AF1R1FXA1MDLJ1yFrHOocIC/go6p4cuuERVI3rxSw3WHGmHeLZ8SyBmhS1WZVHQa2AOvD6qwHHna3HwOuFRFx5VtUdUhVDwBNrr2IbbptrnFt4Nq8MUYf1wGvq+prAKp6QlVtIasoWk70s6CiAJ9PzrqtiqJc5pXl2+4wY8y7xHNp4hrgUMj9VmBttDqqGhSRbqDSlb8Ytm2Nux2pzUqgS1WDEepH62MFoCLyJFCFl8i+Hv4kRGQjsBFg0aLsvXRuyxTPYYl2GeK6+aU8tec4pwZHKMmf/Jn8xpjME8+IJdK/tuEnL0Srk6jyifoIAFcAH3e/Pyoi155RUfVBVa1X1fqqqqoITWU+VT3rkyPDrZpXhgJ7jpxKWJvGmPQWT2JpBRaG3F8AtEWr4455lAEnJ9g2WnkHUO7aCO9roj6eVdUOVe0HngAuiuN5ZZ3ugRFODQXPekZYqDmlecwqymX3ETsL3xjjiSexvAzUutlauXgH47eF1dkG3OZu3wQ8pd4p2duADW5G11KgFngpWptum6ddG7g2H4/Rx5PABSJS6BLOVcDu+F+C7NGSoKnGoUSEc+eW0Nzex8joWMLaNcakr5iJxR3vuBPvC3wPsFVVG0XkPhH5iKv2EFApIk3AXcA9bttGYCveF/0vgTtUdTRam66tu4G7XFuVru2J+ugEvoGXrHYCr6jqv0/1BclkrZ3eqsYLKqa+nEsky6uLCY6pLUppjAFAsnGtp/r6em1oaEh2GDNm/MD77/e184tdR/nrP66jINefsPaHgqN89ed7uHx5JevOn8eta7N3coQxmUxEdqhqfax6duZ9FukaGCEv4CM/J7Fve17Az6LKQvYd701ou8aY9GSJJYt09Y9QVpCDd/pPYtVWF3Oke5DeoWDsysaYjGaJJYt09Q9P6Tr38VheXQxAk41ajMl6lliySFf/yJQuRxyP+eUFFOT4aTpu57MYk+0ssWSJoeAoAyOjlBdMT2LxibC8upim47128S9jspwllizR1e+ty1k+TbvCwNsd1jMYtN1hxmQ5SyxZ4p3EMn3reS2tLAJgx9ud09aHMSb1WWLJEl0Dw8D0jlgqi3MpyPHzakvXtPVhjEl9lliyRFf/CD6Bkvx4FrSeGhFh0axCXmmxEYsx2cwSS5bo6h+mrCAH3zScwxJq4SzvRMnuAbvWmjHZyhJLlvCmGk/fbrBx40vyv3bIdocZk60ssWSJroGRaZtqHGpBRQEi2O4wY7KYJZYsMDqm9AzMzIglP8fPiuoSO4BvTBazxJIFegZGUKZ3qnGoixaX82pLJ2NjdqKkMdnIEksW6BqY/nNYQl24qIKewSDNHXaipDHZyBJLFujqd+ewFEz/rjCAixaVA/CK7Q4zJivFlVhEZJ2I7BWRJhG5J8LjeSLyqHt8u4gsCXnsXle+V0Suj9Wmu1zxdhHZ59rMnagPEVkiIgMistP9fGeqL0am6pyBs+5DLZtdTGl+wI6zGJOlYiYWEfED9wM3AHXALSJSF1btdqBTVZcDm4FNbts6vOvZrwLWAQ+IiD9Gm5uAzapaC3S6tqP24exX1dXu5wuTegWyQPfAMEV5AXL8MzNA9fmE82vKaGzrnpH+jDGpJZ5vmjVAk6o2q+owsAVYH1ZnPfCwu/0YcK14V5NaD2xR1SFVPQA0ufYitum2uca1gWvzxhh9mBi6+mdmqnGo82vKePPIKUZGx2a0X2NM8sWTWGqAQyH3W11ZxDqqGgS6gcoJto1WXgl0uTbC+4rWB8BSEXlVRJ4VkfdHehIislFEGkSkob29PY6nnTmm8zos0ayaX8rw6Bj7jtkBfGOyTTyJJdKoIHweabQ6iSqfqI8jwCJVvRC4C3hERErPqKj6oKrWq2p9VVVVhKYyk6rSNTB9V46M5vyaMgB22e4wY7JOPImlFVgYcn8B0BatjogEgDLg5ATbRivvAMpdG+F9RezD7WY7AaCqO4D9wIo4nldWONk3zMioUjbDu8KWVhZRlOun8bAlFmOyTTyJ5WWg1s3WysU7GL8trM424DZ3+ybgKfUuI7gN2OBmdC0FaoGXorXptnnatYFr8/GJ+hCRKjcZABFZ5vpojv8lyGxtXYMAVMzwrjCfT6ibX8qutp4Z7dcYk3wx11BX1aCI3Ak8CfiB76tqo4jcBzSo6jbgIeBHItKEN1LZ4LZtFJGtwG4gCNyhqqMAkdp0Xd4NbBGRrwKvuraJ1gdwJXCfiASBUeALqnpy6i9JZjnc1Q9M73VYolk1v4xHXz7E6Jji99k8C2OyhWTj9cnr6+u1oaEh2WHMiO/9vpmv/vse/upD51GYN33XYonklbc7eeyVVr54bS1f/KDtnTQm3YnIDlWtj1XPzrzPcG1dg+T6fRTk+me87/nlBV4M3QMz3rcxJnkssWS4w139lBfmkIxTfqpK8gj45PRxHmNMdrDEkuEOdw3M+Dks4/w+YW5ZPoe7bMRiTDaxxJLh2roGZ2zxyUjmlxfQ1jVgS+gbk0UssWSw/uEgJ/uGkzZiAagpK2AoOMahzv6kxWCMmVmWWDLY+LGNZCaW8QP4uw7b+SzGZAtLLBls/NhGMneFzSnNwye2tIsx2cQSSwY73OkSSxJHLAG/jzml+eyypV2MyRqWWDJYW9cAfp9Qkp+8xALe7rDGth6y8WRcY7KRJZYMdrhrgLml+UlfTmV+eQEn+4Y50m3nsxiTDSyxZLDDnQPUuIPnyVRTlg9gu8OMyRKWWDLY4a4BaiqSn1jmlhW4A/g2M8yYbGCJJUMFR8c42jOYEiOW3ICPc6qK7dosxmQJSywZ6mjPIKNjmhIjFvCuKGlTjo3JDpZYMlTLSe9M90WzCpMciWfV/FKO9Qxx/JQdwDcm01liyVCHUiyxnF9TBkCjnYFvTMaLK7GIyDoR2SsiTSJyT4TH80TkUff4dhFZEvLYva58r4hcH6tNd7ni7SKyz7WZG6sP9/giEekVkf8+2RchE7Wc7MfvE+a5GVnJtmp+KSKw81BXskMxxkyzmInFXU/+fuAGoA64RUTqwqrdDnSq6nJgM7DJbVuHdwnhVcA64AER8cdocxOwWVVrgU7XdtQ+QmwGfhHvE890LSe9qcYBf2oMSkvyc1g5p4RXWjqTHYoxZprF862zBmhS1WZVHQa2AOvD6qwHHna3HwOuFe/KUuuBLao6pKoHgCbXXsQ23TbXuDZwbd4Yow9E5EagGWiM/6lntkMn+1NmN9i4ixZXsLOly5bQNybDxZNYaoBDIfdbXVnEOqoaBLqBygm2jVZeCXS5NsL7itiHiBQBdwNfmehJiMhGEWkQkYb29vYYTzn9HTrZz8IUSywXL6rg1FCQfcd7kx2KMWYaxZNYIq0HEv4vZ7Q6iSqfqI+v4O06m/DbSlUfVNV6Va2vqqqaqGra6x0KcqJvOCVHLIDtDjMmw8WTWFqBhSH3FwBt0eqISAAoA05OsG208g6g3LUR3le0PtYCXxeRg8AXgb8UkTvjeF4ZK9VmhI1bUlnIrKJcdrxticWYTBZPYnkZqHWztXLxDsZvC6uzDbjN3b4JeEq9pWy3ARvcjK6lQC3wUrQ23TZPuzZwbT4+UR+q+n5VXaKqS4C/B/4fVf3WJF6DjJNq57CMExEuWlRuIxZjMlzMxOKOZ9wJPAnsAbaqaqOI3CciH3HVHsI73tEE3AXc47ZtBLYCu4FfAneo6mi0Nl1bdwN3ubYqXdtR+zBnStURC8CFiypobu+js2842aEYY6ZJIHYVUNUngCfCyr4ccnsQuDnKtl8DvhZPm668GW/WWHh51D5C6vzNRI9ni5aT/ZTmByhL4gW+ornYHWd59VAn15w7J8nRGGOmQ2qc5GASquVkP4sqU2+0AnDBgjL8PuGVt+1ESWMylSWWDNSSguewjCvMDVA3r5SXD55MdijGmGliiSXDjI0prScHUu4cllCXnVPJKy2d9A4FY1c2xqQdSywZ5tipQYZHx1J2xAJw1coqRkaV55s6kh2KMWYaWGLJMC0nUndG2Lj6xbMoyvXzzFuZvwKCMdnIEkuGSdVzWELlBnxcvnw2z+5txzt1yRiTSSyxZJi3T3jL5c9PgUsST+TqldUc7hqgydYNMybjWGLJMAc6+lg0q5CcFFkuP5qrV3rrtT2z13aHGZNpUvvbx0za/vZels4uSnYYMc0vL2DFnGKeeet4skMxxiSYJZYMMjamHDzRx7I0SCzg7Q576cBJm3ZsTIaxxJJBjvQMMjgyxtKq9Egsf3TeHEZGlV81Hk12KMaYBLLEkkEOtPcBsGx2cZIjic/7llSwaFYh/9LQmuxQjDEJZIklgzR3eDOslqXJiEVEuOniBbzQfOL0iszGmPQX1+rGJj00t/dRlOunuiQv2aGc4ZHtLRHL//TiBWz+zVs8tqOV//rBFTMclTFmOtiIJYM0d/SxtKoIkUhXcU5NNeUFXH7ObB7b0crYmJ0saUwmsBFLBmlu7+WiRRXJDmPSbq5fwF9s2cmLzSe4bPnsqKObW9cumuHIjDFTEdeIRUTWicheEWkSkTOu3OguPfyoe3y7iCwJeexeV75XRK6P1aa7XPF2Ednn2sydqA8RWSMiO93PayLy0am+GOnkke0t7/p5+PmDHO4cSJvjK6GuXzWX0vwA/7z97WSHYoxJgJiJRUT8wP3ADUAdcIuI1IVVux3oVNXlwGZgk9u2Du969quAdcADIuKP0eYmYLOq1gKdru2ofQC7gHpVXe36+K6IZN1I7ETfMAppcXJkuPwcP5+4ZDG/2HWUAx19yQ7HGHOW4hmxrAGaVLVZVYeBLcD6sDrrgYfd7ceAa8Xb0b8e2KKqQ6p6AGhy7UVs021zjWsD1+aNE/Whqv2qOn6GXT6QlTvqO04NAfDWsd4zRjPp4DOXLyXH7+PB3+1PdijGmLMUz3/2NcChkPutwNpodVQ1KCLdQKUrfzFs2xp3O1KblUBXSKIIrR+tjw4RWQt8H1gMfDJk+9NEZCOwEWDRoszbV9/R6yWW2UW5SY5kckIT34ULy9na0MriWUWUFuQkMSpjzNmIZ8QSaYpR+KggWp1ElU8Yh6puV9VVwPuAe0Uk/4yKqg+qar2q1ldVVUVoKr119A5Tmh8gL8ef7FCm7P21VYyNKX/YbxcAMyadxZNYWoGFIfcXAG3R6rjjG2XAyQm2jVbeAZSHHCMJ7StaH6ep6h6gDzg/jueVUTp6h6gsTr3zVyZjVlEu71lQxvYDJ+kftvXDjElX8SSWl4FaN1srF+9g/LawOtuA29ztm4Cn1LuC0zZgg5vRtRSoBV6K1qbb5mnXBq7Nxyfqw7URABCRxcBK4GDcr0AGUFXaTw1RlYInRk7W1SuqGQ6O8fz+E8kOxRgzRTGPsbjjGXcCTwJ+4Puq2igi9wENqroNeAj4kYg04Y0iNrhtG0VkK7AbCAJ3qOooQKQ2XZd3A1tE5KvAq65tovUBXAHcIyIjwBjw56qaVftSTg0GGRgZZU4GJJa5ZfnUzSvl+f0dXLF8NvlpvGvPmGwV17RcVX0CeCKs7MshtweBm6Ns+zXga/G06cqb8WaNhZdH7ENVfwT8KOaTyGDHegYBmFN6xqGltPSBldXsPtLDi80nuHpldbLDMcZMki3pkgGOuanG1RmSWGoqvIuAPdfUwVBwNNnhGGMmyRJLBjjeM0hRXoDivMw5L/SaldX0D4/y0oGTsSsbY1KKJZYMcKxnMCOOr4RaVFnEOVVF/H5fByOjY8kOxxgzCZZY0pyqcuzUUMbsBgv1gXOr6R0K0nDQRi3GpBNLLGmua2CE4eAYc0oza8QCsLSyiMWVhfxuXwdBG7UYkzYssaS54+Mzwkoyb8QiIlyzsprugRFeaelKdjjGmDhZYklzx3q8GWGZMtU43PLqYhZUFPDsW8dt1GJMmrDEkuaO9QxSmh+gIDczTyQUEa5eUU1n/wj//saRZIdjjImDJZY0d+zUYMaOVsadO6+E6pI8Hnh6v12+2Jg0YIkljY25NcKqM2yqcTifCFetqGLvsVM89ebxZIdjjInBEksa6+wbZmRUM37EAnDBgnJqygt44JkmvLVKjTGpyhJLGjuaYWuETcTvE75w1TJeaeliu52Nb0xKs8SSxo50DyJkR2IBuLl+IbOLc7n/6aZkh2KMmYAlljR2pGuA2SV55Aay423Mz/Hz2SuW8vt9HbzR2p3scIwxUWTHN1KGOtI9yLyy7BitjPvEJYspyQvw7Wdt1GJMqrLEkqb6h4J0DYwwv6wg2aHMqNL8HD556WJ+sesoTcd7kx2OMSaCuBKLiKwTkb0i0iQi90R4PE9EHnWPbxeRJSGP3evK94rI9bHadJca3i4i+1ybuRP1ISIfFJEdIvKG+33NVF+MdHLEHbifV55dIxaAz16xlLyAj289tS/ZoRhjIoiZWETED9wP3ADUAbeISF1YtduBTlVdDmwGNrlt6/AuIbwKWAc8ICL+GG1uAjarai3Q6dqO2gfQAXxYVd8D3EaWXE3ySNcAAPOybMQCMLs4j09ftpTHX2ujsc2OtRiTauIZsawBmlS1WVWHgS3A+rA664GH3e3HgGtFRFz5FlUdUtUDQJNrL2KbbptrXBu4Nm+cqA9VfVVV21x5I5AvIpl9xiDe8ZXS/My6uNdk/Kerz6GsIIe//cWbyQ7FGBMmnsRSAxwKud/qyiLWUdUg0A1UTrBttPJKoMu1Ed5XtD5C/SnwqqoOxfG80lpb90BWjlbGlRXkcOcHlvP7fR08t68j2eEYY0LEk1gkQln4qc/R6iSqPGYcIrIKb/fYn0Woh4hsFJEGEWlob2+PVCVtDI6M0n5qKCuPr4T65KWLqSkv4H/9Yg+jtoaYMSkjnsTSCiwMub8AaItWR0QCQBlwcoJto5V3AOWujfC+ovWBiCwAfgp8SlX3R3oSqvqgqtaran1VVVUcTzt17TvWy5hm5/GVUHkBP3ffcC6NbT088lJLssMxxjjxJJaXgVo3WysX72D8trA62/AOnAPcBDyl3oJO24ANbkbXUqAWeClam26bp10buDYfn6gPESkH/h24V1X/MJknn652H/EOWM/PsnNYIvnwBfO4fHklX//lm7Sfyvg9oMakhZiJxR3PuBN4EtgDbFXVRhG5T0Q+4qo9BFSKSBNwF3CP27YR2ArsBn4J3KGqo9HadG3dDdzl2qp0bUftw7WzHPhrEdnpfqqn+Hqkhca2HvICPiqKcpMdStL95KVDrFlSSf/QKBt/2MAj21t4ZLuNXoxJprimFKnqE8ATYWVfDrk9CNwcZduvAV+Lp01X3ow3ayy8PGIfqvpV4Ksxn0QGeb21m3ll+fgk0mGn7FNVksf7V8zmmb3tXLykgmWzi5MdkjFZLTvnqqax4eAYu9t6WLt0VrJDmXETjUSuXlHNa4e62LazjTuvWT6DURljwtmSLmlmz5EehkfHWDCrMNmhpJTcgI8PXzCf46eGeL7pRLLDMSarWWJJM6+1dgGwsCK7Z4RFcu68UurmlfLbN49x2K1MYIyZeZZY0szOli5mF+dRVpCT7FBS0h9fMA+A+37WGKOmMWYov5I7AAAV+ElEQVS6WGJJMztbu1i9sByxA/cRVRTmcs25c3iy8RhPvXks2eEYk5UssaSR7oERmtv7WL2wLNmhpLTLl1eyvLqYLz/eyMDwaLLDMSbrWGJJI6+74yurF1YkOZLUFvD5uHpFFa2dA/z5j3fYuS3GzDBLLGnktUNeYnnPAhuxxLKsqpjVC8v53b4OOyPfmBlmiSWN7DzUxTlVRXbgPk43nD+XHL/ws9fa8FYLMsbMBEssaUJV2Xmom/cuLE92KGmjJD+H6+rm0tTey+uH7YJgxswUSyxpouVkPx29Q1y4yI6vTMaapbOoKS/gideP0DM4kuxwjMkKlljSxAv7vbPJL12WfUu5nA2fCOtXz6d3KMjmX7+V7HCMyQqWWNLEC80nqCrJ45wqW2BxshZUFLJm6Swefv4gjW22S8yY6WaLUKYBVeWF/Se4ZFmlnRg5RdfVzWV/ey9/9X928a9fuAyfb+LXMdr05FvXLpqO8IzJKDZiSQPNHX0cPzXEpcsqkx1K2irI9fOXHzqPV1u62NpwKNnhGJPRLLGkgdPHV86xxHI2PnphDWuXzuJrT+zhYEdfssMxJmPFlVhEZJ2I7BWRJhG5J8LjeSLyqHt8u4gsCXnsXle+V0Suj9Wmu1zxdhHZ59rMnagPEakUkadFpFdEvjXVFyKVvdB8grml+SyptKXyz4aI8Hc3vxe/T9j4owZ6h4LJDsmYjBQzsYiIH7gfuAGoA24RkbqwarcDnaq6HNgMbHLb1uFdz34VsA54QET8MdrcBGxW1Vqg07UdtQ9gEPhr4L9P8rmnBVVle/MJLlk2y46vJMDCWYXcf+tF7G/v479t3cnYmJ04aUyixTNiWQM0qWqzqg4DW4D1YXXWAw+7248B14r3Lbge2KKqQ6p6AGhy7UVs021zjWsD1+aNE/Whqn2q+hxegsk4+4730tE7bLvBEujy5bP5yw+dx5ONx/jCP++gu9/ObzEmkeJJLDVA6NHOVlcWsY6qBoFuoHKCbaOVVwJdro3wvqL1kdH+0NQBwKXLZic5kszy2cuX8Fd/fB5PvXmcP/7m7/n9vnYbvRiTIPFMN460/yX8LzBanWjlkRLaRPXjjSMqEdkIbARYtCh9pow+2XiU5dXFLLLjKwklInzu/cu4eHEFdz7yKp986CUWzirgo6truPa8OYyp4rNdj8ZMSTyJpRVYGHJ/AdAWpU6riASAMuBkjG0jlXcA5SIScKOS0PrR+oiLqj4IPAhQX1+fFv+advQO8dKBk9z5geXJDiVjXbiogt/cdRW/bDzCv+44zDefbuIfnmqiMNfPhQvLuaK2yhb9NGaS4kksLwO1IrIUOIx3MP7WsDrbgNuAF4CbgKdUVUVkG/CIiHwDmA/UAi/hjT7OaNNt87RrY4tr8/GJ+pja004Pv2o8xpjCDe+Zl+xQMlpBrp+PXriAj164gBO9QzzX1MH3fn+AF5pP8GLzSeqXVHDD+fPIDdjsfGPiETOxqGpQRO4EngT8wPdVtVFE7gMaVHUb8BDwIxFpwhtFbHDbNorIVmA3EATuUNVRgEhtui7vBraIyFeBV13bROvDtXUQKAVyReRG4DpV3T3VFyVV/GLXEZbOLuLcuSXJDiVrVBbnsX51DX1Do3T2DfO7fe28dOAkb5/o5+N21r0xcZEM/6c/ovr6em1oaEh2GBPq7Bum/mu/YeOVy7h73blnPG5XREycSMu0hL6+e4+eYmvDIRTlh59dy5qlthCoyU4iskNV62PVs7XCUtSv9xxjdEz50Pm2G2y6xUrSK+eWcMcHlvNPzx/kMz94iR/evoaLF1tyMSYa22mcop544wgLKgo4v6Y02aEYYFZRLp+7YilVJXl8+vsvs9NdJtoYcyZLLCno7RN9PPtWO+tXz7ez7VNIaUEOj3z+EsqLcvjUQ9vZZVelNCYiSywp6PvPHSDgEz516ZJkh2LCzC8v4Cefv4SS/Bw+8dB2drf1JDskY1KOJZYU09U/zNaGVj7y3hrmlOYnOxwTwYKKQn7y+UsoyPHziYe2s/foqWSHZExKsYP3KebH21sYGBnl81cuPV1mM8BSR+h7ccuaRfzj75v5+PdeZMvGS1lebVf3NAZsxJJShoKj/NPzB3l/7WzOnWsH7VPd7OI8PnfFMkC49R9f5K1jNnIxBmzEklL+8yOv0n5qiA9fMN9GKWmiqiSPn3x+Lbf843Y+/M3nuHvduXz6siVRL31slzw22cBGLClif3svT715nFXzS22XSpqpnVPCE39xBVcsn819P9/Nf/j28zz6cgs9g9GX4x8dU7Lx5GSTHWzEkgLGxpR7/+0NAn7hI++dn+xwzBRUl+Tzvdvq+ZeGVr797H7u/tc3+NJPdzG7OI+Kolx8Ar1DQdpPDTEUHGN0TMnxCyX5OVQV5zEUHOXqldUsnV2U7KdizFmzJV1SwA9fOMiXH2/kP1xYQ/0SO6M73YTvxlJVXmvt5u9//RanBoP0DXuXF8rP8ZMX8JEX8JMb8DE4MkrP4AhtXQN09A4DUL+4gk9euph1588lL+Cf8edizERsSZc08ds9x/jKz3Zz9coqLl5ckexwTAKICKsXlnPdqrlxb3PF8tk82XiUH29/m7/YspPKolw+9r6F3Lp2EQsq7Fo8Jr3YMZYkajh4kj//8SvUzSvlW7deZGfZZ7Hnmjooygvwufcv4zOXL6G6NJ/vPLufK7/+NJ97uIFn32pn1K5wadKEjViS5Jm9x/nPP3mVmvIC/ukz76M4z96KdJXIGXw+EWqrS6itLuGqlVX8ZHsLW15u4Td7jlFZlMvVK6u5csVszq8pY2llUdTZZ8Ykkx1jmWGqyref3c///uVe5pTm86lLF1NemJuUWEx6CI6NsefIKXa3dfPWsV4GRkYBKMr1Uze/lFXzyzhvXgm1c0qorS6mJN+ueGmmhx1jSUFNx3v58uO7eH7/Cd5TU8afXrTArkpoYgr4fLynpoz31JQxOqYcPzVIW9cAh7sGOdI1wGuHWhgeHTtdv6wghzmleVSX5POR1fNZ4RJOkY2KzQyJ65MmIuuA/w/vao/fU9W/DXs8D/ghcDFwAviYqh50j90L3A6MAv9FVZ+cqE13ueItwCzgFeCTqjo8lT5SxbGeQR567gA/+MMB8nP8/N83no8P7JiKmTS/T5hXVsC8sgIuXuyVjanS2TfM8VNDHOsZPP27uf0EzzV1nN62pryAFXOKWTGnhGM9Q5QVBCgtyKGsIIeCHD8fv2Rxkp6VyTQxE4uI+IH7gQ8CrcDLIrIt7NK/twOdqrpcRDYAm4CPiUgd3iWEV+Fd8/43IrLCbROtzU3AZlXdIiLfcW1/e7J9jF8COVmGgqO8sP8E215r42evtTE6pnz0wgXcc8O5VJXk2Zn1JmF8IlQW51FZnMd5895ZCmhMlcvOqeStY73sO3aKt457v//QdOJdIxyAHL/w0HMHmFuWz9yyfOaXFTC3LJ95IffLC3PO+p8hVWVgZJSegSBbXm7BL0LA7yPgEwI+4ROXLibHb6P4dF+hIZ4RyxqgSVWbAURkC7Ae7zr249YDf+NuPwZ8S7xP4Hpgi6oOAQfc9erXuHpntCkie4BrgFtdnYddu9+eQh8vxPkaTJqqMhQcYyg4xnBwjL6hIMdPDXG0Z5C9R3tobOuh4WAnvUNBinL9fHztYj57+VIWVdq0UTNzfCIsqypmWVUx685/Z+pzcHSMB3/XTM/ACN2DQboHRugZGKGsIIcj3QO8uP8Ex04NnTELLS/go7o0j4rCXMrcSKe80BvtjCmogqKowsCwd45Oz+AIPQNB93uEnsHghLPbvvLz3eQGfBTl+inMDVCcF6Awz09xXoD8HD8Bn+ATQcR7fgCjqoyNKaNjyph6v0eVd5Xl5/hPxxz6U1qQQ2l+gKK88R8/RXkBcv0+fCL4fYJPErN3YXTMS6oDw+5nZJTeoXdem1ODwdOv1ytvdzIwMsqg+/H7hNyAj+ea2inMDVCU6z8df3lhLuXuvRh/XnnutfK7hO33yYzuIYknsdQAh0LutwJro9VR1aCIdAOVrvzFsG1r3O1IbVYCXaoajFB/Kn0k1GuHurj5uy8wHByLWsfvE2qri/nwe+fxwbo5XHbObPJz7EQ3kxzR/vMtL8ydcNLImCq9LumM//QMjHBqKEj/cBC/TzjcOUDXwAgDw6Onv3wFQKAw109pvvfFNxwco7wgh7ml+RTk+Ml3P7kBn5cIRpXg2BjBMaVuXil9w6P0DwfpHQrSPzRK33CQt0/0MxwcY0wVxfvnrjgvgAJ+EXw+Of27Z2DkdOIRAUEYGR07/aU+ODLKZKcs+cT72/b7BEFOJ1AFcAkVOF2mp+OcZEd4o8fcgJ+CHJ/3WgX8jKnSPzzqTd4YfichTab98fj/5D3z+MbHVk8+sEmIJ7FESnPhTydanWjlkca6E9WfSh/vDlBkI7DR3e0Vkb0Rtpuq2cDpndnNwJPA30atPiPeFVMKScW4UjEmSM24UjEmSM24UjEmNsPszRumHFdcB+LiSSytwMKQ+wuAtih1WkUkAJQBJ2NsG6m8AygXkYAbtYTWn0ofp6nqg8CDcTzfSRORhnim4M2kVIwJUjOuVIwJUjOuVIwJUjOuVIwJZiaueI6SvQzUishSEcnFO1C+LazONuA2d/sm4Cn1TpDZBmwQkTw326sWeClam26bp10buDYfn2IfxhhjkiDmiMUdz7gTb++OH/i+qjaKyH1Ag6puAx4CfuQOnJ/ESxS4elvxDvQHgTvGZ2tFatN1eTewRUS+Crzq2mYqfRhjjJl5WXnmfaKJyEa3qy1lpGJMkJpxpWJMkJpxpWJMkJpxpWJMMDNxWWIxxhiTUHYmkjHGmMRSVfuZ4g+wDtgLNAH3JLDd7wPHgV0hZbOAXwP73O8KVy7AP7gYXgcuCtnmNld/H3BbSPnFwBtum3/gnZFrxD7cYwvxJlbsARqBv0h2XEA+3kSN11xMX3HlS4Htrv6jQK4rz3P3m9zjS0L6vteV7wWuj/UeR+sj5HE/3jHCn6dQTAfd67sT7/hoUt8/91g53gnPb+J9ti5NgZhWutdo/KcH+GIKxPVf8T7nu4Cf4H3+k/65ivgdNhNfwJn4g/fFsR9YBuTifbnVJajtK4GLeHdi+fr4mw3cA2xytz8E/MJ9uC8Btod8QJvd7wp3e/wP4SW8P2Bx294wUR/u/rzxPxigBHgLqEtmXK5esbud4z78lwBbgQ2u/DvAf3K3/xz4jru9AXjU3a5z71+e+yPa797fqO9xtD5CXq+7gEd4J7GkQkwHgdlhZcn+XD0MfM7dzsVLNEmNKcLf+VG88zeS+VmvAQ4ABSHv9aejvefM4Ocq4us201/ImfLjPhRPhty/F7g3ge0v4d2JZS8wz92eB+x1t78L3BJeD7gF+G5I+Xdd2TzgzZDy0/Wi9RElvsfx1npLibiAQrxFS9finQ8VCH+f8GYhXupuB1w9CX/vxutFe4/dNhH7cPcXAL/FW57o5xPVn6mYXNlBzkwsSXv/gFK8L0tJlZgifK6uA/6Q7Lh4Z+WRWe5z8nPg+mjvOTP4uYr0Y8dYpi7SUjfTspSMM0dVjwC439Ux4piovDVC+UR9vIuILAEuxBshJDUuEfGLyE68XYe/xvuvK65lgYDQZYEmE+tESw8B/D3wP4HxtX/iXqpoGmMCb0WKX4nIDrcSBST3/VsGtAM/EJFXReR7IlKU5JjCbcDb7TTRNtMel6oeBv4OaAGO4H1OdpAan6szWGKZuriWkpkBk13q5qziFpFi4F+BL6pqT7LjUtVRVV2NN0pYA5w3QTuJiilqrCLyJ8BxVd0R8lgilyo6m9fvclW9CLgBuENEroywzbiZeP8CeLt8v62qFwJ9eLt/khnTO515J29/BPiXWFWnOy4RqcBbcHcp3iruRXjvY7R2ZvJzdQZLLFMX11IyCXRMROYBuN/HY8QxUfmCCOUT9YEry8FLKj9W1X9LlbgAVLULeAZvH3e5W/YnvJ3Tfce5LFC08tNLD0Xo43LgIyJyEO+6QtfgjWCSGdP4a9Tmfh8HfoqXiJP5/rUCraq63d1/DC/RpMRnCu+L+xVVPRbH85juuP4IOKCq7ao6AvwbcBkp8LmKxBLL1MWz1E0ihS5pcxvvXurmU+K5BOh2Q+gngetEpML9t3Md3r7RI8ApEbnEXXbgU0ReNie0D1zdh4A9qvqNVIhLRKpEpNzdLsD749tD4pYFmvTSQ6p6r6ouUNUlrv5TqvrxZMbkXp8iESkZv+1e913JfP9U9ShwSERWuseuxVtBI6mf9RC38M5usIm2mYm4WoBLRKTQbTP+WiX1cxVVrIMw9jPhAfYP4c2O2g98KYHt/gRvP+oI3n8St+Pt6/wt3pS/3wKzXF3Bu2jafrzpi/Uh7XwWb+pgE/CZkPJ6vC+V/cC3eGeqY8Q+3GNX4A2BX+edaZgfSmZcwAV4U3pfd9t92ZUvc38sTXi7MfJceb673+QeXxbS95dcv3txM3Qmeo+j9RH2Pl7NO7PCkhqTe+w13pma/aUYr+1Mfa5WAw3uPfw/eLOnkhqTe7wQ70q1ZSFlyX6tvoI3LXsX8CO8mV0p8VkP/7Ez740xxiSU7QozxhiTUJZYjDHGJJQlFmOMMQllicUYY0xCWWIxxhiTUJZYjJkiEfmSiDSKyOsislNE1k5Q959E5KZoj4fUOeDaekVELo1S7wsi8qmzjd+Y6RLz0sTGmDO5L/0/wVvxeUhEZuOtCnu2/oeqPiYi1+EtWnhBWL8BVf1OAvoxZtpYYjFmauYBHao6BKCqHQAi8mXgw0AB8DzwZxp2spiIXAx8AyjGWzLj0+oWHgzxO2C5q/+Ma+tyYJs7g75XVf9ORJbjLWVeBYwCN6vqfhH5H8B/xDuJ7qeq+n8l+PkbE5XtCjNman4FLBSRt0TkARG5ypV/S1Xfp6rn4yWXPwndSLz11r4J3KSqF+Nd1O1rEdr/MN5Z3OPKVfUqVf1/w+r9GLhfVd+Lt3bUETfaqcVbC2w1cLFMvOCkMQllIxZjpkBVe93I4/3AB4BHReQevDWg/ifekiCz8JZP+VnIpiuB84Ffe0s+4cdbvmfc/xaRv8JbTv72kPJHw2NwI5caVf2pi2nQlV+Hty7Vq65qMV6i+d3ZPGdj4mWJxZgpUtVRvBWVnxGRN4A/wzsmUq+qh0Tkb/DWbAolQKOqRjwwjzvGEqG8L0JZpCXNx8v/l6p+N8ZTMGZa2K4wY6ZARFaKSG1I0Wq8Rf0AOsS7bk2kWWB7garxGV8ikiMiq6YSg3rXw2kVkRtdW3kiUoi3qu5nXQyISI2IRLuQlTEJZyMWY6amGPimW7Y/iLfy60agC+/YyEG8pcjfRVWH3bTjfxCRMry/wb/H22U2FZ8Evisi9+Gthn2zqv5KRM4DXnC723qBTxD5miPGJJytbmyMMSahbFeYMcaYhLLEYowxJqEssRhjjEkoSyzGGGMSyhKLMcaYhLLEYowxJqEssRhjjEkoSyzGGGMS6v8HU45PteWT2ScAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "#绘制SalePrice分布直方图\n",
    "sns.distplot(df_train['SalePrice'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Id不能作为特征，丢弃这一列\n",
    "df_train.drop(['Id'],axis = 1, inplace = True)\n",
    "df_test.drop(['Id'],axis = 1, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1460 entries, 0 to 1459\n",
      "Data columns (total 80 columns):\n",
      "MSSubClass       1460 non-null int64\n",
      "MSZoning         1460 non-null object\n",
      "LotFrontage      1201 non-null float64\n",
      "LotArea          1460 non-null int64\n",
      "Street           1460 non-null object\n",
      "Alley            91 non-null object\n",
      "LotShape         1460 non-null object\n",
      "LandContour      1460 non-null object\n",
      "Utilities        1460 non-null object\n",
      "LotConfig        1460 non-null object\n",
      "LandSlope        1460 non-null object\n",
      "Neighborhood     1460 non-null object\n",
      "Condition1       1460 non-null object\n",
      "Condition2       1460 non-null object\n",
      "BldgType         1460 non-null object\n",
      "HouseStyle       1460 non-null object\n",
      "OverallQual      1460 non-null int64\n",
      "OverallCond      1460 non-null int64\n",
      "YearBuilt        1460 non-null int64\n",
      "YearRemodAdd     1460 non-null int64\n",
      "RoofStyle        1460 non-null object\n",
      "RoofMatl         1460 non-null object\n",
      "Exterior1st      1460 non-null object\n",
      "Exterior2nd      1460 non-null object\n",
      "MasVnrType       1452 non-null object\n",
      "MasVnrArea       1452 non-null float64\n",
      "ExterQual        1460 non-null object\n",
      "ExterCond        1460 non-null object\n",
      "Foundation       1460 non-null object\n",
      "BsmtQual         1423 non-null object\n",
      "BsmtCond         1423 non-null object\n",
      "BsmtExposure     1422 non-null object\n",
      "BsmtFinType1     1423 non-null object\n",
      "BsmtFinSF1       1460 non-null int64\n",
      "BsmtFinType2     1422 non-null object\n",
      "BsmtFinSF2       1460 non-null int64\n",
      "BsmtUnfSF        1460 non-null int64\n",
      "TotalBsmtSF      1460 non-null int64\n",
      "Heating          1460 non-null object\n",
      "HeatingQC        1460 non-null object\n",
      "CentralAir       1460 non-null object\n",
      "Electrical       1459 non-null object\n",
      "1stFlrSF         1460 non-null int64\n",
      "2ndFlrSF         1460 non-null int64\n",
      "LowQualFinSF     1460 non-null int64\n",
      "GrLivArea        1460 non-null int64\n",
      "BsmtFullBath     1460 non-null int64\n",
      "BsmtHalfBath     1460 non-null int64\n",
      "FullBath         1460 non-null int64\n",
      "HalfBath         1460 non-null int64\n",
      "BedroomAbvGr     1460 non-null int64\n",
      "KitchenAbvGr     1460 non-null int64\n",
      "KitchenQual      1460 non-null object\n",
      "TotRmsAbvGrd     1460 non-null int64\n",
      "Functional       1460 non-null object\n",
      "Fireplaces       1460 non-null int64\n",
      "FireplaceQu      770 non-null object\n",
      "GarageType       1379 non-null object\n",
      "GarageYrBlt      1379 non-null float64\n",
      "GarageFinish     1379 non-null object\n",
      "GarageCars       1460 non-null int64\n",
      "GarageArea       1460 non-null int64\n",
      "GarageQual       1379 non-null object\n",
      "GarageCond       1379 non-null object\n",
      "PavedDrive       1460 non-null object\n",
      "WoodDeckSF       1460 non-null int64\n",
      "OpenPorchSF      1460 non-null int64\n",
      "EnclosedPorch    1460 non-null int64\n",
      "3SsnPorch        1460 non-null int64\n",
      "ScreenPorch      1460 non-null int64\n",
      "PoolArea         1460 non-null int64\n",
      "PoolQC           7 non-null object\n",
      "Fence            281 non-null object\n",
      "MiscFeature      54 non-null object\n",
      "MiscVal          1460 non-null int64\n",
      "MoSold           1460 non-null int64\n",
      "YrSold           1460 non-null int64\n",
      "SaleType         1460 non-null object\n",
      "SaleCondition    1460 non-null object\n",
      "SalePrice        1460 non-null int64\n",
      "dtypes: float64(3), int64(34), object(43)\n",
      "memory usage: 912.6+ KB\n"
     ]
    }
   ],
   "source": [
    "df_train.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MSSubClass        0.000000\n",
       "MSZoning          0.000000\n",
       "LotFrontage      17.739726\n",
       "LotArea           0.000000\n",
       "Street            0.000000\n",
       "Alley            93.767123\n",
       "LotShape          0.000000\n",
       "LandContour       0.000000\n",
       "Utilities         0.000000\n",
       "LotConfig         0.000000\n",
       "LandSlope         0.000000\n",
       "Neighborhood      0.000000\n",
       "Condition1        0.000000\n",
       "Condition2        0.000000\n",
       "BldgType          0.000000\n",
       "HouseStyle        0.000000\n",
       "OverallQual       0.000000\n",
       "OverallCond       0.000000\n",
       "YearBuilt         0.000000\n",
       "YearRemodAdd      0.000000\n",
       "RoofStyle         0.000000\n",
       "RoofMatl          0.000000\n",
       "Exterior1st       0.000000\n",
       "Exterior2nd       0.000000\n",
       "MasVnrType        0.547945\n",
       "MasVnrArea        0.547945\n",
       "ExterQual         0.000000\n",
       "ExterCond         0.000000\n",
       "Foundation        0.000000\n",
       "BsmtQual          2.534247\n",
       "BsmtCond          2.534247\n",
       "BsmtExposure      2.602740\n",
       "BsmtFinType1      2.534247\n",
       "BsmtFinSF1        0.000000\n",
       "BsmtFinType2      2.602740\n",
       "BsmtFinSF2        0.000000\n",
       "BsmtUnfSF         0.000000\n",
       "TotalBsmtSF       0.000000\n",
       "Heating           0.000000\n",
       "HeatingQC         0.000000\n",
       "CentralAir        0.000000\n",
       "Electrical        0.068493\n",
       "1stFlrSF          0.000000\n",
       "2ndFlrSF          0.000000\n",
       "LowQualFinSF      0.000000\n",
       "GrLivArea         0.000000\n",
       "BsmtFullBath      0.000000\n",
       "BsmtHalfBath      0.000000\n",
       "FullBath          0.000000\n",
       "HalfBath          0.000000\n",
       "BedroomAbvGr      0.000000\n",
       "KitchenAbvGr      0.000000\n",
       "KitchenQual       0.000000\n",
       "TotRmsAbvGrd      0.000000\n",
       "Functional        0.000000\n",
       "Fireplaces        0.000000\n",
       "FireplaceQu      47.260274\n",
       "GarageType        5.547945\n",
       "GarageYrBlt       5.547945\n",
       "GarageFinish      5.547945\n",
       "GarageCars        0.000000\n",
       "GarageArea        0.000000\n",
       "GarageQual        5.547945\n",
       "GarageCond        5.547945\n",
       "PavedDrive        0.000000\n",
       "WoodDeckSF        0.000000\n",
       "OpenPorchSF       0.000000\n",
       "EnclosedPorch     0.000000\n",
       "3SsnPorch         0.000000\n",
       "ScreenPorch       0.000000\n",
       "PoolArea          0.000000\n",
       "PoolQC           99.520548\n",
       "Fence            80.753425\n",
       "MiscFeature      96.301370\n",
       "MiscVal           0.000000\n",
       "MoSold            0.000000\n",
       "YrSold            0.000000\n",
       "SaleType          0.000000\n",
       "SaleCondition     0.000000\n",
       "SalePrice         0.000000\n",
       "dtype: float64"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#查看每列缺失值数量占比\n",
    "pd.options.display.max_rows = None\n",
    "display(((df_train.isnull().sum()/len(df_train)*100)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {},
   "outputs": [],
   "source": [
    "#丢弃缺失值占比超过80%的列\n",
    "df_train.drop({'Alley','Fence','MiscFeature','PoolQC'}, axis = 1, inplace = True)\n",
    "df_test.drop({'Alley','Fence','MiscFeature','PoolQC'}, axis = 1, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [],
   "source": [
    "#区分数值特征与分类特征\n",
    "cat_variables =  [ i for i in df_train.columns if df_train.dtypes[i]=='object' ]\n",
    "num_variables = [ i for i in df_train.columns if df_train.dtypes[i]!='object' ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['MSSubClass',\n",
       " 'LotFrontage',\n",
       " 'LotArea',\n",
       " 'OverallQual',\n",
       " 'OverallCond',\n",
       " 'YearBuilt',\n",
       " 'YearRemodAdd',\n",
       " 'MasVnrArea',\n",
       " 'BsmtFinSF1',\n",
       " 'BsmtFinSF2',\n",
       " 'BsmtUnfSF',\n",
       " 'TotalBsmtSF',\n",
       " '1stFlrSF',\n",
       " '2ndFlrSF',\n",
       " 'LowQualFinSF',\n",
       " 'GrLivArea',\n",
       " 'BsmtFullBath',\n",
       " 'BsmtHalfBath',\n",
       " 'FullBath',\n",
       " 'HalfBath',\n",
       " 'BedroomAbvGr',\n",
       " 'KitchenAbvGr',\n",
       " 'TotRmsAbvGrd',\n",
       " 'Fireplaces',\n",
       " 'GarageYrBlt',\n",
       " 'GarageCars',\n",
       " 'GarageArea',\n",
       " 'WoodDeckSF',\n",
       " 'OpenPorchSF',\n",
       " 'EnclosedPorch',\n",
       " '3SsnPorch',\n",
       " 'ScreenPorch',\n",
       " 'PoolArea',\n",
       " 'MiscVal',\n",
       " 'MoSold',\n",
       " 'YrSold',\n",
       " 'SalePrice']"
      ]
     },
     "execution_count": 132,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "num_variables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [],
   "source": [
    "#将数值特征中有缺失值的用该列中位数填充\n",
    "df_train['BsmtFinSF1'].fillna(df_train['BsmtFinSF1'].median(),inplace = True)\n",
    "df_test['BsmtFinSF1'].fillna(df_test['BsmtFinSF1'].median(), inplace = True)\n",
    "\n",
    "df_train['BsmtFinSF2'].fillna(df_train['BsmtFinSF2'].median(), inplace = True)\n",
    "df_test['BsmtFinSF2'].fillna(df_test['BsmtFinSF2'].median(), inplace = True)\n",
    "\n",
    "df_train['BsmtFullBath'].fillna(df_train['BsmtFullBath'].median(), inplace = True)\n",
    "df_test['BsmtFullBath'].fillna(df_test['BsmtFullBath'].median(), inplace = True)\n",
    "\n",
    "df_train['BsmtHalfBath'].fillna(df_train['BsmtHalfBath'].median(), inplace = True)\n",
    "df_test['BsmtHalfBath'].fillna(df_test['BsmtHalfBath'].median(), inplace = True)\n",
    "\n",
    "df_train['BsmtUnfSF'].fillna(df_train['BsmtUnfSF'].median(), inplace = True)\n",
    "df_test['BsmtUnfSF'].fillna(df_test['BsmtUnfSF'].median(), inplace = True)\n",
    "\n",
    "df_train['GarageArea'].fillna(df_train['GarageArea'].median(), inplace = True)\n",
    "df_test['GarageArea'].fillna(df_test['GarageArea'].median(), inplace = True)\n",
    "\n",
    "df_train['GarageCars'].fillna(df_train['GarageCars'].median(), inplace = True)\n",
    "df_test['GarageCars'].fillna(df_test['GarageCars'].median(), inplace = True)\n",
    "\n",
    "df_train['GarageYrBlt'].fillna(df_train['GarageYrBlt'].median(), inplace = True)\n",
    "df_train['GarageYrBlt'].fillna(df_train['GarageYrBlt'].median(), inplace = True)\n",
    "\n",
    "df_train['LotFrontage'].fillna(df_train['LotFrontage'].median(), inplace = True)\n",
    "df_test['LotFrontage'].fillna(df_test['LotFrontage'].median(), inplace = True)\n",
    "\n",
    "df_train['MasVnrArea'].fillna(df_train['MasVnrArea'].median(), inplace = True)\n",
    "df_test['MasVnrArea'].fillna(df_test['MasVnrArea'].median(), inplace = True)\n",
    "\n",
    "df_train['TotalBsmtSF'].fillna(df_train['TotalBsmtSF'].median(), inplace = True)\n",
    "df_test['TotalBsmtSF'].fillna(df_train['TotalBsmtSF'].median(), inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "查看描述文件data_description，发现分类特征中有些特征的缺失值NA实际代表了一种分类类别，\n",
    "例如对于BsmtCond( Evaluates the general condition of the basement), NA代表No Basement(没有地下室)\n",
    "对此类特征缺失值,填充具有相应含义的字符串（例如NB）；对除此类外的分类特征中有缺失值的用该列一个众数填充。\n",
    "\"\"\"\n",
    "df_train['Electrical'].fillna(df_train['Electrical'].mode()[0], inplace = True)\n",
    "df_test['Electrical'].fillna(df_test['Electrical'].mode()[0], inplace = True)\n",
    "\n",
    "df_train['Exterior1st'].fillna(df_train['Exterior1st'].mode()[0], inplace = True)\n",
    "df_test['Exterior1st'].fillna(df_test['Exterior1st'].mode()[0], inplace = True)\n",
    "\n",
    "df_train['Exterior2nd'].fillna(df_train['Exterior2nd'].mode()[0], inplace = True)\n",
    "df_test['Exterior2nd'].fillna(df_test['Exterior2nd'].mode()[0], inplace = True)\n",
    "\n",
    "df_train['Functional'].fillna(df_train['Functional'].mode()[0], inplace = True)\n",
    "df_test['Functional'].fillna(df_test['Functional'].mode()[0], inplace = True)\n",
    "\n",
    "df_train['KitchenQual'].fillna(df_train['KitchenQual'].mode()[0], inplace = True)\n",
    "df_test['KitchenQual'].fillna(df_test['KitchenQual'].mode()[0], inplace = True)\n",
    "\n",
    "df_train['MSZoning'].fillna(df_train['MSZoning'].mode()[0], inplace = True)\n",
    "df_test['MSZoning'].fillna(df_test['MSZoning'].mode()[0], inplace = True)\n",
    "\n",
    "df_train['MasVnrType'].fillna(df_train['MasVnrType'].mode()[0], inplace = True)\n",
    "df_test['MasVnrType'].fillna(df_test['MasVnrType'].mode()[0], inplace = True)\n",
    "\n",
    "df_train['SaleType'].fillna(df_train['SaleType'].mode()[0], inplace = True)\n",
    "df_test['SaleType'].fillna(df_test['SaleType'].mode()[0], inplace = True)\n",
    "\n",
    "df_train['Utilities'].fillna(df_train['Utilities'].mode()[0], inplace = True)\n",
    "df_test['Utilities'].fillna(df_test['Utilities'].mode()[0], inplace = True)\n",
    "\n",
    "df_train.fillna({'GarageType':'NG', 'GarageQual':'NG','GarageFinish':'NG','GarageCond':'NG','FireplaceQu':'NF',\n",
    "                 'BsmtQual':'NB','BsmtCond':'NB','BsmtExposure':'NB','BsmtFinType1':'NB','BsmtFinType2':'NB',\n",
    "                }, inplace=True)\n",
    "df_test.fillna({'GarageType':'NG', 'GarageQual':'NG','GarageFinish':'NG','GarageCond':'NG','FireplaceQu':'NF',\n",
    "                 'BsmtQual':'NB','BsmtCond':'NB','BsmtExposure':'NB','BsmtFinType1':'NB','BsmtFinType2':'NB',\n",
    "                }, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "分类特征有两类：有序类别型和无序类别型。\n",
    "对于有序类别型，例如ExterCond: Ex、Gd、TA、Fa、Po可视为一种递进的顺序，依次编码为0、1、2、3、4。\n",
    "下面对有序类别型的所有特征进行此一编码操作。\n",
    "\n",
    "ord_dict = {\"LotShape\": ['Reg','IR1','IR2','IR3'],\n",
    "            \"LandSlope\" : [\"Gtl\", \"Mod\", \"Sev\" ],\n",
    "            \"ExterQual\": [  \"Ex\", \"Gd\", \"TA\", \"Fa\", \"Po\" ],\n",
    "            \"ExterCond\": [  \"Ex\", \"Gd\", \"TA\", \"Fa\", \"Po\" ],\n",
    "            \"BsmtQual\": [  \"Ex\", \"Gd\", \"TA\", \"Fa\", \"Po\", \"NB\" ],\n",
    "            \"BsmtCond\":[  \"Ex\", \"Gd\", \"TA\", \"Fa\", \"Po\", \"NB\" ],\n",
    "            \"BsmtExposure\": [\"Gd\", \"Av\", \"Mn\", \"No\", \"NB\"],\n",
    "            \"BsmtFinType1\":[ \"GLQ\",\"ALQ\",\"BLQ\",\"Rec\",\"LwQ\",\"Unf\",\"NB\"],\n",
    "            \"BsmtFinType2\":[ \"GLQ\",\"ALQ\",\"BLQ\",\"Rec\",\"LwQ\",\"Unf\",\"NB\"],\n",
    "            \"HeatingQC\": [  \"Ex\", \"Gd\", \"TA\", \"Fa\", \"Po\"],\n",
    "            \"KitchenQual\": [  \"Ex\", \"Gd\", \"TA\", \"Fa\", \"Po\"],\n",
    "            \"FireplaceQu\": [  \"Ex\", \"Gd\", \"TA\", \"Fa\", \"Po\", \"NF\" ],\n",
    "            \"GarageQual\":[  \"Ex\", \"Gd\", \"TA\", \"Fa\", \"Po\", \"NG\"],\n",
    "            \"GarageCond\": [  \"Ex\", \"Gd\", \"TA\", \"Fa\", \"Po\", \"NG\" ],\n",
    "            \"GarageFinish\";[\"Fin\",\"RFn\",\"Unf\",\"NG\"]\n",
    "            \"Utilities\":  ['AllPub','NoSeWr','NoSeWa','ELO']\n",
    "        }\n",
    "cols_ord = ord_dict.keys()\n",
    "le = LabelEncoder()\n",
    "\n",
    "for col in cols_ord:\n",
    "    Le = le.fit(ord_dict[col])\n",
    "    df_train[col] = Le.transform(df_train[col])\n",
    "    df_test[col] = Le.transform(df_test[col])\n",
    "\"\"\"\n",
    "Le = LabelEncoder()\n",
    "\n",
    "le = Le.fit(['Reg','IR1','IR2','IR3'])\n",
    "df_train[\"LotShape\"] = le.transform(df_train[\"LotShape\"])\n",
    "df_test[\"LotShape\"] = le.transform(df_test[\"LotShape\"])\n",
    "\n",
    "le = Le.fit([\"Gtl\", \"Mod\", \"Sev\" ])\n",
    "df_train[\"LandSlope\"] = le.transform(df_train[\"LandSlope\"])\n",
    "df_test[\"LandSlope\"] = le.transform(df_test[\"LandSlope\"])\n",
    "\n",
    "le = Le.fit([ \"Ex\", \"Gd\", \"TA\", \"Fa\", \"Po\" ])\n",
    "df_train[\"ExterQual\"] = le.transform(df_train[\"ExterQual\"])\n",
    "df_test[\"ExterQual\"] = le.transform(df_test[\"ExterQual\"])\n",
    "\n",
    "le = Le.fit([ \"Ex\", \"Gd\", \"TA\", \"Fa\", \"Po\" ])\n",
    "df_train[\"ExterCond\"] = le.transform(df_train[\"ExterCond\"])\n",
    "df_test[\"ExterCond\"] = le.transform(df_test[\"ExterCond\"])\n",
    "\n",
    "le = Le.fit([ \"Ex\", \"Gd\", \"TA\", \"Fa\", \"Po\", \"NB\" ])\n",
    "df_train[\"BsmtQual\"] = le.transform(df_train[\"BsmtQual\"])\n",
    "df_test[\"BsmtQual\"] = le.transform(df_test[\"BsmtQual\"])\n",
    "\n",
    "le = Le.fit([  \"Ex\", \"Gd\", \"TA\", \"Fa\", \"Po\", \"NB\" ])\n",
    "df_train[\"BsmtCond\"] = le.transform(df_train[\"BsmtCond\"])\n",
    "df_test[\"BsmtCond\"] = le.transform(df_test[\"BsmtCond\"])\n",
    "\n",
    "le = Le.fit([\"Gd\", \"Av\", \"Mn\", \"No\", \"NB\"])\n",
    "df_train[\"BsmtExposure\"] = le.transform(df_train[\"BsmtExposure\"])\n",
    "df_test[\"BsmtExposure\"] = le.transform(df_test[\"BsmtExposure\"])\n",
    "\n",
    "le = Le.fit([ \"GLQ\",\"ALQ\",\"BLQ\",\"Rec\",\"LwQ\",\"Unf\",\"NB\"])\n",
    "df_train[\"BsmtFinType1\"] = le.transform(df_train[\"BsmtFinType1\"])\n",
    "df_test[\"BsmtFinType1\"] = le.transform(df_test[\"BsmtFinType1\"])\n",
    "\n",
    "le = Le.fit([ \"GLQ\",\"ALQ\",\"BLQ\",\"Rec\",\"LwQ\",\"Unf\",\"NB\"])\n",
    "df_train[\"BsmtFinType2\"] = le.transform(df_train[\"BsmtFinType2\"])\n",
    "df_test[\"BsmtFinType2\"] = le.transform(df_test[\"BsmtFinType2\"])\n",
    "\n",
    "le = Le.fit([  \"Ex\", \"Gd\", \"TA\", \"Fa\", \"Po\"])\n",
    "df_train[\"HeatingQC\"] = le.transform(df_train[\"HeatingQC\"])\n",
    "df_test[\"HeatingQC\"] = le.transform(df_test[\"HeatingQC\"])\n",
    "\n",
    "le = Le.fit([  \"Ex\", \"Gd\", \"TA\", \"Fa\", \"Po\"])\n",
    "df_train[\"KitchenQual\"] = le.transform(df_train[\"KitchenQual\"])\n",
    "df_test[\"KitchenQual\"] = le.transform(df_test[\"KitchenQual\"])\n",
    "\n",
    "le = Le.fit([  \"Ex\", \"Gd\", \"TA\", \"Fa\", \"Po\", \"NF\" ])\n",
    "df_train[\"FireplaceQu\"] = le.transform(df_train[\"FireplaceQu\"])\n",
    "df_test[\"FireplaceQu\"] = le.transform(df_test[\"FireplaceQu\"])\n",
    "\n",
    "le = Le.fit([  \"Ex\", \"Gd\", \"TA\", \"Fa\", \"Po\", \"NG\" ])\n",
    "df_train[\"GarageQual\"] = le.transform(df_train[\"GarageQual\"])\n",
    "df_test[\"GarageQual\"] = le.transform(df_test[\"GarageQual\"])\n",
    "\n",
    "le = Le.fit([  \"Ex\", \"Gd\", \"TA\", \"Fa\", \"Po\", \"NG\" ])\n",
    "df_train[\"GarageCond\"] = le.transform(df_train[\"GarageCond\"])\n",
    "df_test[\"GarageCond\"] = le.transform(df_test[\"GarageCond\"])\n",
    "\n",
    "le = Le.fit([\"Fin\",\"RFn\",\"Unf\",\"NG\"])\n",
    "df_train[\"GarageFinish\"] = le.transform(df_train[\"GarageFinish\"])\n",
    "df_test[\"GarageFinish\"] = le.transform(df_test[\"GarageFinish\"])\n",
    "\n",
    "le = Le.fit(['AllPub','NoSeWr','NoSeWa','ELO'])\n",
    "df_train[\"Utilities\"] = le.transform(df_train[\"Utilities\"])\n",
    "df_test[\"Utilities\"] = le.transform(df_test[\"Utilities\"])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>MSSubClass</th>\n",
       "      <th>MSZoning</th>\n",
       "      <th>LotFrontage</th>\n",
       "      <th>LotArea</th>\n",
       "      <th>Street</th>\n",
       "      <th>LotShape</th>\n",
       "      <th>LandContour</th>\n",
       "      <th>Utilities</th>\n",
       "      <th>LotConfig</th>\n",
       "      <th>LandSlope</th>\n",
       "      <th>...</th>\n",
       "      <th>EnclosedPorch</th>\n",
       "      <th>3SsnPorch</th>\n",
       "      <th>ScreenPorch</th>\n",
       "      <th>PoolArea</th>\n",
       "      <th>MiscVal</th>\n",
       "      <th>MoSold</th>\n",
       "      <th>YrSold</th>\n",
       "      <th>SaleType</th>\n",
       "      <th>SaleCondition</th>\n",
       "      <th>SalePrice</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>65.0</td>\n",
       "      <td>8450</td>\n",
       "      <td>Pave</td>\n",
       "      <td>3</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>0</td>\n",
       "      <td>Inside</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>208500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>80.0</td>\n",
       "      <td>9600</td>\n",
       "      <td>Pave</td>\n",
       "      <td>3</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>0</td>\n",
       "      <td>FR2</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>2007</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>181500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>68.0</td>\n",
       "      <td>11250</td>\n",
       "      <td>Pave</td>\n",
       "      <td>0</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>0</td>\n",
       "      <td>Inside</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>223500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>70</td>\n",
       "      <td>RL</td>\n",
       "      <td>60.0</td>\n",
       "      <td>9550</td>\n",
       "      <td>Pave</td>\n",
       "      <td>0</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>0</td>\n",
       "      <td>Corner</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>272</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2006</td>\n",
       "      <td>WD</td>\n",
       "      <td>Abnorml</td>\n",
       "      <td>140000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>84.0</td>\n",
       "      <td>14260</td>\n",
       "      <td>Pave</td>\n",
       "      <td>0</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>0</td>\n",
       "      <td>FR2</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>2008</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "      <td>250000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 76 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   MSSubClass MSZoning  LotFrontage  LotArea Street  LotShape LandContour  \\\n",
       "0          60       RL         65.0     8450   Pave         3         Lvl   \n",
       "1          20       RL         80.0     9600   Pave         3         Lvl   \n",
       "2          60       RL         68.0    11250   Pave         0         Lvl   \n",
       "3          70       RL         60.0     9550   Pave         0         Lvl   \n",
       "4          60       RL         84.0    14260   Pave         0         Lvl   \n",
       "\n",
       "   Utilities LotConfig  LandSlope    ...    EnclosedPorch 3SsnPorch  \\\n",
       "0          0    Inside          0    ...                0         0   \n",
       "1          0       FR2          0    ...                0         0   \n",
       "2          0    Inside          0    ...                0         0   \n",
       "3          0    Corner          0    ...              272         0   \n",
       "4          0       FR2          0    ...                0         0   \n",
       "\n",
       "  ScreenPorch PoolArea MiscVal  MoSold  YrSold  SaleType  SaleCondition  \\\n",
       "0           0        0       0       2    2008        WD         Normal   \n",
       "1           0        0       0       5    2007        WD         Normal   \n",
       "2           0        0       0       9    2008        WD         Normal   \n",
       "3           0        0       0       2    2006        WD        Abnorml   \n",
       "4           0        0       0      12    2008        WD         Normal   \n",
       "\n",
       "  SalePrice  \n",
       "0    208500  \n",
       "1    181500  \n",
       "2    223500  \n",
       "3    140000  \n",
       "4    250000  \n",
       "\n",
       "[5 rows x 76 columns]"
      ]
     },
     "execution_count": 136,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {},
   "outputs": [],
   "source": [
    "#处理无序类别型，利用Pandas实现one hot encode。\n",
    "dff_train = pd.get_dummies(df_train)\n",
    "dff_test = pd.get_dummies(df_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1460, 224)\n",
      "(1459, 209)\n"
     ]
    }
   ],
   "source": [
    "print(dff_train.shape)\n",
    "print(dff_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "SalePrice\n",
      "Condition2_RRAe\n",
      "Condition2_RRAn\n",
      "Condition2_RRNn\n",
      "HouseStyle_2.5Fin\n",
      "RoofMatl_ClyTile\n",
      "RoofMatl_Membran\n",
      "RoofMatl_Metal\n",
      "RoofMatl_Roll\n",
      "Exterior1st_ImStucc\n",
      "Exterior1st_Stone\n",
      "Exterior2nd_Other\n",
      "Heating_Floor\n",
      "Heating_OthW\n",
      "Electrical_Mix\n"
     ]
    }
   ],
   "source": [
    "#由于独热编码后train集的特征数比test集的特征数大不止1，故必须找出并删去这那些train集中有而test集中没有的特征（除了SalePrice）。\n",
    "for i in dff_train.columns:\n",
    "    if i not in dff_test.columns:\n",
    "        print(i)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_del = ['Condition2_RRAe','Condition2_RRAn','Condition2_RRNn','HouseStyle_2.5Fin','RoofMatl_ClyTile','RoofMatl_Membran',\n",
    "       'RoofMatl_Metal','RoofMatl_Roll','Exterior1st_ImStucc','Exterior1st_Stone','Exterior2nd_Other','Heating_Floor',\n",
    "       'Heating_OthW','Electrical_Mix']\n",
    "for num in num_del:\n",
    "    dff_train.drop(num,axis = 1 , inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1460, 210)\n",
      "(1459, 209)\n"
     ]
    }
   ],
   "source": [
    "print(dff_train.shape)\n",
    "print(dff_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>MSSubClass</th>\n",
       "      <th>LotFrontage</th>\n",
       "      <th>LotArea</th>\n",
       "      <th>LotShape</th>\n",
       "      <th>Utilities</th>\n",
       "      <th>LandSlope</th>\n",
       "      <th>OverallQual</th>\n",
       "      <th>OverallCond</th>\n",
       "      <th>YearBuilt</th>\n",
       "      <th>YearRemodAdd</th>\n",
       "      <th>...</th>\n",
       "      <th>SaleType_ConLw</th>\n",
       "      <th>SaleType_New</th>\n",
       "      <th>SaleType_Oth</th>\n",
       "      <th>SaleType_WD</th>\n",
       "      <th>SaleCondition_Abnorml</th>\n",
       "      <th>SaleCondition_AdjLand</th>\n",
       "      <th>SaleCondition_Alloca</th>\n",
       "      <th>SaleCondition_Family</th>\n",
       "      <th>SaleCondition_Normal</th>\n",
       "      <th>SaleCondition_Partial</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>60</td>\n",
       "      <td>65.0</td>\n",
       "      <td>8450</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "      <td>2003</td>\n",
       "      <td>2003</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>20</td>\n",
       "      <td>80.0</td>\n",
       "      <td>9600</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>8</td>\n",
       "      <td>1976</td>\n",
       "      <td>1976</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>60</td>\n",
       "      <td>68.0</td>\n",
       "      <td>11250</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "      <td>2001</td>\n",
       "      <td>2002</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>70</td>\n",
       "      <td>60.0</td>\n",
       "      <td>9550</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>5</td>\n",
       "      <td>1915</td>\n",
       "      <td>1970</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>60</td>\n",
       "      <td>84.0</td>\n",
       "      <td>14260</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>5</td>\n",
       "      <td>2000</td>\n",
       "      <td>2000</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 210 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   MSSubClass  LotFrontage  LotArea  LotShape  Utilities  LandSlope  \\\n",
       "0          60         65.0     8450         3          0          0   \n",
       "1          20         80.0     9600         3          0          0   \n",
       "2          60         68.0    11250         0          0          0   \n",
       "3          70         60.0     9550         0          0          0   \n",
       "4          60         84.0    14260         0          0          0   \n",
       "\n",
       "   OverallQual  OverallCond  YearBuilt  YearRemodAdd          ...            \\\n",
       "0            7            5       2003          2003          ...             \n",
       "1            6            8       1976          1976          ...             \n",
       "2            7            5       2001          2002          ...             \n",
       "3            7            5       1915          1970          ...             \n",
       "4            8            5       2000          2000          ...             \n",
       "\n",
       "   SaleType_ConLw  SaleType_New  SaleType_Oth  SaleType_WD  \\\n",
       "0               0             0             0            1   \n",
       "1               0             0             0            1   \n",
       "2               0             0             0            1   \n",
       "3               0             0             0            1   \n",
       "4               0             0             0            1   \n",
       "\n",
       "   SaleCondition_Abnorml  SaleCondition_AdjLand  SaleCondition_Alloca  \\\n",
       "0                      0                      0                     0   \n",
       "1                      0                      0                     0   \n",
       "2                      0                      0                     0   \n",
       "3                      1                      0                     0   \n",
       "4                      0                      0                     0   \n",
       "\n",
       "   SaleCondition_Family  SaleCondition_Normal  SaleCondition_Partial  \n",
       "0                     0                     1                      0  \n",
       "1                     0                     1                      0  \n",
       "2                     0                     1                      0  \n",
       "3                     0                     0                      0  \n",
       "4                     0                     1                      0  \n",
       "\n",
       "[5 rows x 210 columns]"
      ]
     },
     "execution_count": 142,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dff_train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1460 entries, 0 to 1459\n",
      "Columns: 210 entries, MSSubClass to SaleCondition_Partial\n",
      "dtypes: float64(3), int64(50), uint8(157)\n",
      "memory usage: 828.5 KB\n"
     ]
    }
   ],
   "source": [
    "dff_train.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [],
   "source": [
    "#将train集中SalePrice变换成其对数值。\n",
    "dff_train['SalePrice'] = np.log(dff_train['SalePrice'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "MSSubClass               True\n",
      "LotFrontage              True\n",
      "LotArea                  True\n",
      "LotShape                 True\n",
      "Utilities                True\n",
      "LandSlope                True\n",
      "OverallQual              True\n",
      "OverallCond              True\n",
      "YearBuilt                True\n",
      "YearRemodAdd             True\n",
      "MasVnrArea               True\n",
      "ExterQual                True\n",
      "ExterCond                True\n",
      "BsmtQual                 True\n",
      "BsmtCond                 True\n",
      "BsmtExposure             True\n",
      "BsmtFinType1             True\n",
      "BsmtFinSF1               True\n",
      "BsmtFinType2             True\n",
      "BsmtFinSF2               True\n",
      "BsmtUnfSF                True\n",
      "TotalBsmtSF              True\n",
      "HeatingQC                True\n",
      "1stFlrSF                 True\n",
      "2ndFlrSF                 True\n",
      "LowQualFinSF             True\n",
      "GrLivArea                True\n",
      "BsmtFullBath             True\n",
      "BsmtHalfBath             True\n",
      "FullBath                 True\n",
      "HalfBath                 True\n",
      "BedroomAbvGr             True\n",
      "KitchenAbvGr             True\n",
      "KitchenQual              True\n",
      "TotRmsAbvGrd             True\n",
      "Fireplaces               True\n",
      "FireplaceQu              True\n",
      "GarageYrBlt              True\n",
      "GarageFinish             True\n",
      "GarageCars               True\n",
      "GarageArea               True\n",
      "GarageQual               True\n",
      "GarageCond               True\n",
      "WoodDeckSF               True\n",
      "OpenPorchSF              True\n",
      "EnclosedPorch            True\n",
      "3SsnPorch                True\n",
      "ScreenPorch              True\n",
      "PoolArea                 True\n",
      "MiscVal                  True\n",
      "MoSold                   True\n",
      "YrSold                   True\n",
      "SalePrice                True\n",
      "MSZoning_C (all)         True\n",
      "MSZoning_FV              True\n",
      "MSZoning_RH              True\n",
      "MSZoning_RL              True\n",
      "MSZoning_RM              True\n",
      "Street_Grvl              True\n",
      "Street_Pave              True\n",
      "LandContour_Bnk          True\n",
      "LandContour_HLS          True\n",
      "LandContour_Low          True\n",
      "LandContour_Lvl          True\n",
      "LotConfig_Corner         True\n",
      "LotConfig_CulDSac        True\n",
      "LotConfig_FR2            True\n",
      "LotConfig_FR3            True\n",
      "LotConfig_Inside         True\n",
      "Neighborhood_Blmngtn     True\n",
      "Neighborhood_Blueste     True\n",
      "Neighborhood_BrDale      True\n",
      "Neighborhood_BrkSide     True\n",
      "Neighborhood_ClearCr     True\n",
      "Neighborhood_CollgCr     True\n",
      "Neighborhood_Crawfor     True\n",
      "Neighborhood_Edwards     True\n",
      "Neighborhood_Gilbert     True\n",
      "Neighborhood_IDOTRR      True\n",
      "Neighborhood_MeadowV     True\n",
      "Neighborhood_Mitchel     True\n",
      "Neighborhood_NAmes       True\n",
      "Neighborhood_NPkVill     True\n",
      "Neighborhood_NWAmes      True\n",
      "Neighborhood_NoRidge     True\n",
      "Neighborhood_NridgHt     True\n",
      "Neighborhood_OldTown     True\n",
      "Neighborhood_SWISU       True\n",
      "Neighborhood_Sawyer      True\n",
      "Neighborhood_SawyerW     True\n",
      "Neighborhood_Somerst     True\n",
      "Neighborhood_StoneBr     True\n",
      "Neighborhood_Timber      True\n",
      "Neighborhood_Veenker     True\n",
      "Condition1_Artery        True\n",
      "Condition1_Feedr         True\n",
      "Condition1_Norm          True\n",
      "Condition1_PosA          True\n",
      "Condition1_PosN          True\n",
      "Condition1_RRAe          True\n",
      "Condition1_RRAn          True\n",
      "Condition1_RRNe          True\n",
      "Condition1_RRNn          True\n",
      "Condition2_Artery        True\n",
      "Condition2_Feedr         True\n",
      "Condition2_Norm          True\n",
      "Condition2_PosA          True\n",
      "Condition2_PosN          True\n",
      "BldgType_1Fam            True\n",
      "BldgType_2fmCon          True\n",
      "BldgType_Duplex          True\n",
      "BldgType_Twnhs           True\n",
      "BldgType_TwnhsE          True\n",
      "HouseStyle_1.5Fin        True\n",
      "HouseStyle_1.5Unf        True\n",
      "HouseStyle_1Story        True\n",
      "HouseStyle_2.5Unf        True\n",
      "HouseStyle_2Story        True\n",
      "HouseStyle_SFoyer        True\n",
      "HouseStyle_SLvl          True\n",
      "RoofStyle_Flat           True\n",
      "RoofStyle_Gable          True\n",
      "RoofStyle_Gambrel        True\n",
      "RoofStyle_Hip            True\n",
      "RoofStyle_Mansard        True\n",
      "RoofStyle_Shed           True\n",
      "RoofMatl_CompShg         True\n",
      "RoofMatl_Tar&Grv         True\n",
      "RoofMatl_WdShake         True\n",
      "RoofMatl_WdShngl         True\n",
      "Exterior1st_AsbShng      True\n",
      "Exterior1st_AsphShn      True\n",
      "Exterior1st_BrkComm      True\n",
      "Exterior1st_BrkFace      True\n",
      "Exterior1st_CBlock       True\n",
      "Exterior1st_CemntBd      True\n",
      "Exterior1st_HdBoard      True\n",
      "Exterior1st_MetalSd      True\n",
      "Exterior1st_Plywood      True\n",
      "Exterior1st_Stucco       True\n",
      "Exterior1st_VinylSd      True\n",
      "Exterior1st_Wd Sdng      True\n",
      "Exterior1st_WdShing      True\n",
      "Exterior2nd_AsbShng      True\n",
      "Exterior2nd_AsphShn      True\n",
      "Exterior2nd_Brk Cmn      True\n",
      "Exterior2nd_BrkFace      True\n",
      "Exterior2nd_CBlock       True\n",
      "Exterior2nd_CmentBd      True\n",
      "Exterior2nd_HdBoard      True\n",
      "Exterior2nd_ImStucc      True\n",
      "Exterior2nd_MetalSd      True\n",
      "Exterior2nd_Plywood      True\n",
      "Exterior2nd_Stone        True\n",
      "Exterior2nd_Stucco       True\n",
      "Exterior2nd_VinylSd      True\n",
      "Exterior2nd_Wd Sdng      True\n",
      "Exterior2nd_Wd Shng      True\n",
      "MasVnrType_BrkCmn        True\n",
      "MasVnrType_BrkFace       True\n",
      "MasVnrType_None          True\n",
      "MasVnrType_Stone         True\n",
      "Foundation_BrkTil        True\n",
      "Foundation_CBlock        True\n",
      "Foundation_PConc         True\n",
      "Foundation_Slab          True\n",
      "Foundation_Stone         True\n",
      "Foundation_Wood          True\n",
      "Heating_GasA             True\n",
      "Heating_GasW             True\n",
      "Heating_Grav             True\n",
      "Heating_Wall             True\n",
      "CentralAir_N             True\n",
      "CentralAir_Y             True\n",
      "Electrical_FuseA         True\n",
      "Electrical_FuseF         True\n",
      "Electrical_FuseP         True\n",
      "Electrical_SBrkr         True\n",
      "Functional_Maj1          True\n",
      "Functional_Maj2          True\n",
      "Functional_Min1          True\n",
      "Functional_Min2          True\n",
      "Functional_Mod           True\n",
      "Functional_Sev           True\n",
      "Functional_Typ           True\n",
      "GarageType_2Types        True\n",
      "GarageType_Attchd        True\n",
      "GarageType_Basment       True\n",
      "GarageType_BuiltIn       True\n",
      "GarageType_CarPort       True\n",
      "GarageType_Detchd        True\n",
      "GarageType_NG            True\n",
      "PavedDrive_N             True\n",
      "PavedDrive_P             True\n",
      "PavedDrive_Y             True\n",
      "SaleType_COD             True\n",
      "SaleType_CWD             True\n",
      "SaleType_Con             True\n",
      "SaleType_ConLD           True\n",
      "SaleType_ConLI           True\n",
      "SaleType_ConLw           True\n",
      "SaleType_New             True\n",
      "SaleType_Oth             True\n",
      "SaleType_WD              True\n",
      "SaleCondition_Abnorml    True\n",
      "SaleCondition_AdjLand    True\n",
      "SaleCondition_Alloca     True\n",
      "SaleCondition_Family     True\n",
      "SaleCondition_Normal     True\n",
      "SaleCondition_Partial    True\n",
      "dtype: bool\n",
      "MSSubClass                True\n",
      "LotFrontage               True\n",
      "LotArea                   True\n",
      "LotShape                  True\n",
      "Utilities                 True\n",
      "LandSlope                 True\n",
      "OverallQual               True\n",
      "OverallCond               True\n",
      "YearBuilt                 True\n",
      "YearRemodAdd              True\n",
      "MasVnrArea                True\n",
      "ExterQual                 True\n",
      "ExterCond                 True\n",
      "BsmtQual                  True\n",
      "BsmtCond                  True\n",
      "BsmtExposure              True\n",
      "BsmtFinType1              True\n",
      "BsmtFinSF1                True\n",
      "BsmtFinType2              True\n",
      "BsmtFinSF2                True\n",
      "BsmtUnfSF                 True\n",
      "TotalBsmtSF               True\n",
      "HeatingQC                 True\n",
      "1stFlrSF                  True\n",
      "2ndFlrSF                  True\n",
      "LowQualFinSF              True\n",
      "GrLivArea                 True\n",
      "BsmtFullBath              True\n",
      "BsmtHalfBath              True\n",
      "FullBath                  True\n",
      "HalfBath                  True\n",
      "BedroomAbvGr              True\n",
      "KitchenAbvGr              True\n",
      "KitchenQual               True\n",
      "TotRmsAbvGrd              True\n",
      "Fireplaces                True\n",
      "FireplaceQu               True\n",
      "GarageYrBlt              False\n",
      "GarageFinish              True\n",
      "GarageCars                True\n",
      "GarageArea                True\n",
      "GarageQual                True\n",
      "GarageCond                True\n",
      "WoodDeckSF                True\n",
      "OpenPorchSF               True\n",
      "EnclosedPorch             True\n",
      "3SsnPorch                 True\n",
      "ScreenPorch               True\n",
      "PoolArea                  True\n",
      "MiscVal                   True\n",
      "MoSold                    True\n",
      "YrSold                    True\n",
      "MSZoning_C (all)          True\n",
      "MSZoning_FV               True\n",
      "MSZoning_RH               True\n",
      "MSZoning_RL               True\n",
      "MSZoning_RM               True\n",
      "Street_Grvl               True\n",
      "Street_Pave               True\n",
      "LandContour_Bnk           True\n",
      "LandContour_HLS           True\n",
      "LandContour_Low           True\n",
      "LandContour_Lvl           True\n",
      "LotConfig_Corner          True\n",
      "LotConfig_CulDSac         True\n",
      "LotConfig_FR2             True\n",
      "LotConfig_FR3             True\n",
      "LotConfig_Inside          True\n",
      "Neighborhood_Blmngtn      True\n",
      "Neighborhood_Blueste      True\n",
      "Neighborhood_BrDale       True\n",
      "Neighborhood_BrkSide      True\n",
      "Neighborhood_ClearCr      True\n",
      "Neighborhood_CollgCr      True\n",
      "Neighborhood_Crawfor      True\n",
      "Neighborhood_Edwards      True\n",
      "Neighborhood_Gilbert      True\n",
      "Neighborhood_IDOTRR       True\n",
      "Neighborhood_MeadowV      True\n",
      "Neighborhood_Mitchel      True\n",
      "Neighborhood_NAmes        True\n",
      "Neighborhood_NPkVill      True\n",
      "Neighborhood_NWAmes       True\n",
      "Neighborhood_NoRidge      True\n",
      "Neighborhood_NridgHt      True\n",
      "Neighborhood_OldTown      True\n",
      "Neighborhood_SWISU        True\n",
      "Neighborhood_Sawyer       True\n",
      "Neighborhood_SawyerW      True\n",
      "Neighborhood_Somerst      True\n",
      "Neighborhood_StoneBr      True\n",
      "Neighborhood_Timber       True\n",
      "Neighborhood_Veenker      True\n",
      "Condition1_Artery         True\n",
      "Condition1_Feedr          True\n",
      "Condition1_Norm           True\n",
      "Condition1_PosA           True\n",
      "Condition1_PosN           True\n",
      "Condition1_RRAe           True\n",
      "Condition1_RRAn           True\n",
      "Condition1_RRNe           True\n",
      "Condition1_RRNn           True\n",
      "Condition2_Artery         True\n",
      "Condition2_Feedr          True\n",
      "Condition2_Norm           True\n",
      "Condition2_PosA           True\n",
      "Condition2_PosN           True\n",
      "BldgType_1Fam             True\n",
      "BldgType_2fmCon           True\n",
      "BldgType_Duplex           True\n",
      "BldgType_Twnhs            True\n",
      "BldgType_TwnhsE           True\n",
      "HouseStyle_1.5Fin         True\n",
      "HouseStyle_1.5Unf         True\n",
      "HouseStyle_1Story         True\n",
      "HouseStyle_2.5Unf         True\n",
      "HouseStyle_2Story         True\n",
      "HouseStyle_SFoyer         True\n",
      "HouseStyle_SLvl           True\n",
      "RoofStyle_Flat            True\n",
      "RoofStyle_Gable           True\n",
      "RoofStyle_Gambrel         True\n",
      "RoofStyle_Hip             True\n",
      "RoofStyle_Mansard         True\n",
      "RoofStyle_Shed            True\n",
      "RoofMatl_CompShg          True\n",
      "RoofMatl_Tar&Grv          True\n",
      "RoofMatl_WdShake          True\n",
      "RoofMatl_WdShngl          True\n",
      "Exterior1st_AsbShng       True\n",
      "Exterior1st_AsphShn       True\n",
      "Exterior1st_BrkComm       True\n",
      "Exterior1st_BrkFace       True\n",
      "Exterior1st_CBlock        True\n",
      "Exterior1st_CemntBd       True\n",
      "Exterior1st_HdBoard       True\n",
      "Exterior1st_MetalSd       True\n",
      "Exterior1st_Plywood       True\n",
      "Exterior1st_Stucco        True\n",
      "Exterior1st_VinylSd       True\n",
      "Exterior1st_Wd Sdng       True\n",
      "Exterior1st_WdShing       True\n",
      "Exterior2nd_AsbShng       True\n",
      "Exterior2nd_AsphShn       True\n",
      "Exterior2nd_Brk Cmn       True\n",
      "Exterior2nd_BrkFace       True\n",
      "Exterior2nd_CBlock        True\n",
      "Exterior2nd_CmentBd       True\n",
      "Exterior2nd_HdBoard       True\n",
      "Exterior2nd_ImStucc       True\n",
      "Exterior2nd_MetalSd       True\n",
      "Exterior2nd_Plywood       True\n",
      "Exterior2nd_Stone         True\n",
      "Exterior2nd_Stucco        True\n",
      "Exterior2nd_VinylSd       True\n",
      "Exterior2nd_Wd Sdng       True\n",
      "Exterior2nd_Wd Shng       True\n",
      "MasVnrType_BrkCmn         True\n",
      "MasVnrType_BrkFace        True\n",
      "MasVnrType_None           True\n",
      "MasVnrType_Stone          True\n",
      "Foundation_BrkTil         True\n",
      "Foundation_CBlock         True\n",
      "Foundation_PConc          True\n",
      "Foundation_Slab           True\n",
      "Foundation_Stone          True\n",
      "Foundation_Wood           True\n",
      "Heating_GasA              True\n",
      "Heating_GasW              True\n",
      "Heating_Grav              True\n",
      "Heating_Wall              True\n",
      "CentralAir_N              True\n",
      "CentralAir_Y              True\n",
      "Electrical_FuseA          True\n",
      "Electrical_FuseF          True\n",
      "Electrical_FuseP          True\n",
      "Electrical_SBrkr          True\n",
      "Functional_Maj1           True\n",
      "Functional_Maj2           True\n",
      "Functional_Min1           True\n",
      "Functional_Min2           True\n",
      "Functional_Mod            True\n",
      "Functional_Sev            True\n",
      "Functional_Typ            True\n",
      "GarageType_2Types         True\n",
      "GarageType_Attchd         True\n",
      "GarageType_Basment        True\n",
      "GarageType_BuiltIn        True\n",
      "GarageType_CarPort        True\n",
      "GarageType_Detchd         True\n",
      "GarageType_NG             True\n",
      "PavedDrive_N              True\n",
      "PavedDrive_P              True\n",
      "PavedDrive_Y              True\n",
      "SaleType_COD              True\n",
      "SaleType_CWD              True\n",
      "SaleType_Con              True\n",
      "SaleType_ConLD            True\n",
      "SaleType_ConLI            True\n",
      "SaleType_ConLw            True\n",
      "SaleType_New              True\n",
      "SaleType_Oth              True\n",
      "SaleType_WD               True\n",
      "SaleCondition_Abnorml     True\n",
      "SaleCondition_AdjLand     True\n",
      "SaleCondition_Alloca      True\n",
      "SaleCondition_Family      True\n",
      "SaleCondition_Normal      True\n",
      "SaleCondition_Partial     True\n",
      "dtype: bool\n"
     ]
    }
   ],
   "source": [
    "print(np.isfinite(dff_train).all())\n",
    "print(np.isfinite(dff_test).all())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {},
   "outputs": [],
   "source": [
    "#丢弃数据集中\"GarageYrBlt\"一列\n",
    "dff_train.drop(['GarageYrBlt'], axis = 1, inplace = True)\n",
    "dff_test.drop(['GarageYrBlt'], axis =1, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {},
   "outputs": [],
   "source": [
    "#去掉部分噪声\n",
    "dff_train = dff_train[dff_train['SalePrice'] < 700000]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "SalePrice                1.000000\n",
       "OverallQual              0.817184\n",
       "GrLivArea                0.700927\n",
       "GarageCars               0.680625\n",
       "GarageArea               0.650888\n",
       "TotalBsmtSF              0.612134\n",
       "1stFlrSF                 0.596981\n",
       "FullBath                 0.594771\n",
       "YearBuilt                0.586570\n",
       "YearRemodAdd             0.565608\n",
       "TotRmsAbvGrd             0.534422\n",
       "Foundation_PConc         0.530840\n",
       "Fireplaces               0.489449\n",
       "MasVnrArea               0.426775\n",
       "GarageType_Attchd        0.415787\n",
       "BsmtFinSF1               0.372023\n",
       "Neighborhood_NridgHt     0.351802\n",
       "CentralAir_Y             0.351600\n",
       "Exterior2nd_VinylSd      0.337444\n",
       "Exterior1st_VinylSd      0.336138\n",
       "LotFrontage              0.335292\n",
       "WoodDeckSF               0.334135\n",
       "SaleType_New             0.329190\n",
       "SaleCondition_Partial    0.324471\n",
       "GarageCond               0.323691\n",
       "OpenPorchSF              0.321053\n",
       "2ndFlrSF                 0.319300\n",
       "HalfBath                 0.313982\n",
       "MasVnrType_Stone         0.311369\n",
       "Electrical_SBrkr         0.305993\n",
       "PavedDrive_Y             0.299959\n",
       "MSZoning_RL              0.298862\n",
       "Neighborhood_NoRidge     0.277523\n",
       "GarageQual               0.275692\n",
       "HouseStyle_2Story        0.263046\n",
       "LotArea                  0.257320\n",
       "BsmtFullBath             0.236224\n",
       "MasVnrType_BrkFace       0.231447\n",
       "GarageType_BuiltIn       0.222532\n",
       "BsmtUnfSF                0.221985\n",
       "BedroomAbvGr             0.209044\n",
       "RoofStyle_Hip            0.198235\n",
       "Neighborhood_StoneBr     0.185579\n",
       "Neighborhood_Somerst     0.170694\n",
       "ExterCond                0.148561\n",
       "LotConfig_CulDSac        0.148309\n",
       "Neighborhood_Timber      0.138945\n",
       "Heating_GasA             0.134285\n",
       "Functional_Typ           0.133545\n",
       "BldgType_1Fam            0.132469\n",
       "BsmtCond                 0.122452\n",
       "ScreenPorch              0.121208\n",
       "MSZoning_FV              0.120312\n",
       "RoofMatl_WdShngl         0.118906\n",
       "Condition1_Norm          0.118581\n",
       "Neighborhood_CollgCr     0.118291\n",
       "LandContour_HLS          0.110919\n",
       "Exterior1st_CemntBd      0.094582\n",
       "Exterior2nd_CmentBd      0.092229\n",
       "Neighborhood_Crawfor     0.087003\n",
       "Neighborhood_Gilbert     0.078915\n",
       "Neighborhood_ClearCr     0.075586\n",
       "BsmtFinType2             0.072430\n",
       "Neighborhood_Veenker     0.069850\n",
       "PoolArea                 0.069798\n",
       "Condition1_PosN          0.061520\n",
       "Neighborhood_NWAmes      0.061220\n",
       "Street_Pave              0.057369\n",
       "MoSold                   0.057329\n",
       "3SsnPorch                0.054900\n",
       "RoofMatl_WdShake         0.052967\n",
       "Condition1_PosA          0.052539\n",
       "Condition2_Norm          0.045333\n",
       "Exterior2nd_ImStucc      0.044922\n",
       "Condition2_PosA          0.043765\n",
       "Condition2_PosN          0.043577\n",
       "SaleType_Con             0.042653\n",
       "Neighborhood_Blmngtn     0.039513\n",
       "LandSlope                0.038526\n",
       "LandContour_Low          0.038239\n",
       "Neighborhood_SawyerW     0.034247\n",
       "Exterior1st_BrkFace      0.030223\n",
       "RoofStyle_Shed           0.026671\n",
       "BldgType_TwnhsE          0.025800\n",
       "RoofStyle_Flat           0.023466\n",
       "SaleType_CWD             0.022878\n",
       "Condition1_RRNn          0.022761\n",
       "LotConfig_FR3            0.022529\n",
       "Condition1_RRAn          0.016763\n",
       "Exterior2nd_BrkFace      0.013888\n",
       "Condition1_RRNe          0.012473\n",
       "Exterior1st_Plywood      0.012151\n",
       "RoofMatl_Tar&Grv         0.009607\n",
       "Foundation_Wood          0.008912\n",
       "RoofStyle_Mansard        0.005801\n",
       "LotConfig_FR2            0.004883\n",
       "BsmtFinSF2               0.004832\n",
       "LotConfig_Corner         0.003985\n",
       "SaleType_ConLI           0.003058\n",
       "BsmtHalfBath            -0.005149\n",
       "LandContour_Lvl         -0.010080\n",
       "HouseStyle_SLvl         -0.011837\n",
       "Utilities               -0.012632\n",
       "Foundation_Stone        -0.014622\n",
       "Functional_Sev          -0.016816\n",
       "Neighborhood_Blueste    -0.018320\n",
       "Exterior2nd_Stone       -0.018440\n",
       "MiscVal                 -0.020021\n",
       "GarageType_2Types       -0.020092\n",
       "Exterior2nd_Plywood     -0.024091\n",
       "Exterior2nd_AsphShn     -0.024411\n",
       "SaleCondition_Alloca    -0.024985\n",
       "HouseStyle_2.5Unf       -0.025158\n",
       "Heating_GasW            -0.026123\n",
       "Exterior1st_CBlock      -0.030312\n",
       "Exterior2nd_CBlock      -0.030312\n",
       "GarageType_Basment      -0.030945\n",
       "Neighborhood_NPkVill    -0.031078\n",
       "Exterior1st_AsphShn     -0.033511\n",
       "OverallCond             -0.036868\n",
       "YrSold                  -0.037263\n",
       "SaleType_ConLw          -0.037339\n",
       "LowQualFinSF            -0.037963\n",
       "SaleType_Oth            -0.039631\n",
       "Functional_Maj1         -0.040518\n",
       "Neighborhood_Mitchel    -0.042049\n",
       "Condition2_Artery       -0.042111\n",
       "Functional_Mod          -0.042197\n",
       "MasVnrType_BrkCmn       -0.043583\n",
       "Exterior1st_Stucco      -0.043651\n",
       "Condition1_RRAe         -0.044021\n",
       "SaleCondition_Family    -0.045567\n",
       "RoofStyle_Gambrel       -0.049433\n",
       "SaleType_ConLD          -0.049515\n",
       "Exterior2nd_HdBoard     -0.049996\n",
       "Exterior2nd_Wd Shng     -0.053960\n",
       "FireplaceQu             -0.055688\n",
       "Condition2_Feedr        -0.056855\n",
       "Exterior2nd_Brk Cmn     -0.056973\n",
       "Street_Grvl             -0.057369\n",
       "Exterior2nd_Stucco      -0.057668\n",
       "Functional_Min1         -0.059711\n",
       "Exterior1st_WdShing     -0.060532\n",
       "Neighborhood_SWISU      -0.061354\n",
       "SaleCondition_AdjLand   -0.064961\n",
       "Electrical_FuseP        -0.065595\n",
       "Functional_Min2         -0.069045\n",
       "MSZoning_RH             -0.072285\n",
       "MSSubClass              -0.073959\n",
       "HouseStyle_1Story       -0.075635\n",
       "Exterior1st_HdBoard     -0.076516\n",
       "Heating_Wall            -0.078009\n",
       "Exterior1st_BrkComm     -0.080301\n",
       "SaleType_COD            -0.085772\n",
       "GarageType_CarPort      -0.086003\n",
       "LotConfig_Inside        -0.089076\n",
       "RoofMatl_CompShg        -0.089917\n",
       "PavedDrive_P            -0.095287\n",
       "HouseStyle_SFoyer       -0.095935\n",
       "SaleCondition_Normal    -0.101838\n",
       "Functional_Maj2         -0.103863\n",
       "HouseStyle_1.5Unf       -0.105717\n",
       "BldgType_Twnhs          -0.109125\n",
       "BldgType_2fmCon         -0.110174\n",
       "LandContour_Bnk         -0.113441\n",
       "BldgType_Duplex         -0.117010\n",
       "Neighborhood_Sawyer     -0.123012\n",
       "Condition1_Feedr        -0.124858\n",
       "BsmtFinType1            -0.125342\n",
       "Neighborhood_BrDale     -0.125527\n",
       "Condition1_Artery       -0.130347\n",
       "Exterior2nd_AsbShng     -0.133940\n",
       "Exterior1st_AsbShng     -0.147430\n",
       "KitchenAbvGr            -0.147548\n",
       "EnclosedPorch           -0.149050\n",
       "Heating_Grav            -0.149166\n",
       "Neighborhood_MeadowV    -0.149371\n",
       "Foundation_Slab         -0.158992\n",
       "SaleCondition_Abnorml   -0.160625\n",
       "Electrical_FuseF        -0.166522\n",
       "Neighborhood_NAmes      -0.166757\n",
       "Exterior2nd_MetalSd     -0.167887\n",
       "Exterior1st_MetalSd     -0.173292\n",
       "Neighborhood_BrkSide    -0.175385\n",
       "HouseStyle_1.5Fin       -0.181039\n",
       "Exterior2nd_Wd Sdng     -0.183538\n",
       "Exterior1st_Wd Sdng     -0.188329\n",
       "MSZoning_C (all)        -0.188377\n",
       "RoofStyle_Gable         -0.188744\n",
       "SaleType_WD             -0.211679\n",
       "Neighborhood_Edwards    -0.211693\n",
       "Neighborhood_OldTown    -0.232243\n",
       "Neighborhood_IDOTRR     -0.233067\n",
       "Electrical_FuseA        -0.238951\n",
       "Foundation_BrkTil       -0.251700\n",
       "BsmtExposure            -0.277372\n",
       "LotShape                -0.278091\n",
       "PavedDrive_N            -0.286344\n",
       "GarageType_NG           -0.322999\n",
       "Foundation_CBlock       -0.337815\n",
       "MSZoning_RM             -0.347446\n",
       "CentralAir_N            -0.351600\n",
       "MasVnrType_None         -0.387818\n",
       "GarageType_Detchd       -0.388638\n",
       "GarageFinish            -0.417404\n",
       "HeatingQC               -0.425906\n",
       "BsmtQual                -0.561447\n",
       "KitchenQual             -0.562738\n",
       "ExterQual               -0.610651\n",
       "Name: SalePrice, dtype: float64"
      ]
     },
     "execution_count": 148,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#寻找与SalePrice相关性更大的特征\n",
    "dff_train.corr()['SalePrice'].sort_values(ascending = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 149,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['LandSlope',\n",
       " 'BsmtFinType2',\n",
       " 'BsmtFinSF2',\n",
       " '3SsnPorch',\n",
       " 'PoolArea',\n",
       " 'MoSold',\n",
       " 'Street_Pave',\n",
       " 'LandContour_Low',\n",
       " 'LotConfig_Corner',\n",
       " 'LotConfig_FR2',\n",
       " 'LotConfig_FR3',\n",
       " 'Neighborhood_Blmngtn',\n",
       " 'Neighborhood_ClearCr',\n",
       " 'Neighborhood_Crawfor',\n",
       " 'Neighborhood_Gilbert',\n",
       " 'Neighborhood_NWAmes',\n",
       " 'Neighborhood_SawyerW',\n",
       " 'Neighborhood_Veenker',\n",
       " 'Condition1_PosA',\n",
       " 'Condition1_PosN',\n",
       " 'Condition1_RRAn',\n",
       " 'Condition1_RRNe',\n",
       " 'Condition1_RRNn',\n",
       " 'Condition2_Norm',\n",
       " 'Condition2_PosA',\n",
       " 'Condition2_PosN',\n",
       " 'BldgType_TwnhsE',\n",
       " 'RoofStyle_Flat',\n",
       " 'RoofStyle_Mansard',\n",
       " 'RoofStyle_Shed',\n",
       " 'RoofMatl_Tar&Grv',\n",
       " 'RoofMatl_WdShake',\n",
       " 'Exterior1st_BrkFace',\n",
       " 'Exterior1st_CemntBd',\n",
       " 'Exterior1st_Plywood',\n",
       " 'Exterior2nd_BrkFace',\n",
       " 'Exterior2nd_CmentBd',\n",
       " 'Exterior2nd_ImStucc',\n",
       " 'Foundation_Wood',\n",
       " 'SaleType_CWD',\n",
       " 'SaleType_Con',\n",
       " 'SaleType_ConLI']"
      ]
     },
     "execution_count": 149,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#丢弃与SalePrice相关系数小于0.1（正数）的特征\n",
    "num_del1  = [i for i in dff_train.columns if 0 < dff_train.corr()['SalePrice'][i] < 0.1]\n",
    "num_del1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {},
   "outputs": [],
   "source": [
    "for n in num_del1:\n",
    "    dff_train.drop([n], axis = 1, inplace = True)\n",
    "    dff_test.drop([n], axis = 1, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {},
   "outputs": [],
   "source": [
    "#将train集划分为测试集训练集\n",
    "x_train, x_test, y_train, y_test = train_test_split(dff_train.drop(['SalePrice'], axis = 1), dff_train['SalePrice'], test_size = 0.2, random_state = 0);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:458: UserWarning: With alpha=0, this algorithm does not converge well. You are advised to use the LinearRegression estimator\n",
      "  estimator.fit(X_train, y_train, **fit_params)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:477: UserWarning: Coordinate descent with no regularization may lead to unexpected results and is discouraged.\n",
      "  positive)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:491: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Fitting data with very small alpha may cause precision problems.\n",
      "  ConvergenceWarning)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:458: UserWarning: With alpha=0, this algorithm does not converge well. You are advised to use the LinearRegression estimator\n",
      "  estimator.fit(X_train, y_train, **fit_params)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:477: UserWarning: Coordinate descent with no regularization may lead to unexpected results and is discouraged.\n",
      "  positive)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:491: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Fitting data with very small alpha may cause precision problems.\n",
      "  ConvergenceWarning)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:458: UserWarning: With alpha=0, this algorithm does not converge well. You are advised to use the LinearRegression estimator\n",
      "  estimator.fit(X_train, y_train, **fit_params)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:477: UserWarning: Coordinate descent with no regularization may lead to unexpected results and is discouraged.\n",
      "  positive)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:491: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Fitting data with very small alpha may cause precision problems.\n",
      "  ConvergenceWarning)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:491: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Fitting data with very small alpha may cause precision problems.\n",
      "  ConvergenceWarning)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:491: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Fitting data with very small alpha may cause precision problems.\n",
      "  ConvergenceWarning)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:458: UserWarning: With alpha=0, this algorithm does not converge well. You are advised to use the LinearRegression estimator\n",
      "  estimator.fit(X_train, y_train, **fit_params)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:477: UserWarning: Coordinate descent with no regularization may lead to unexpected results and is discouraged.\n",
      "  positive)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:491: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Fitting data with very small alpha may cause precision problems.\n",
      "  ConvergenceWarning)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:458: UserWarning: With alpha=0, this algorithm does not converge well. You are advised to use the LinearRegression estimator\n",
      "  estimator.fit(X_train, y_train, **fit_params)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:477: UserWarning: Coordinate descent with no regularization may lead to unexpected results and is discouraged.\n",
      "  positive)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:491: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Fitting data with very small alpha may cause precision problems.\n",
      "  ConvergenceWarning)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_validation.py:458: UserWarning: With alpha=0, this algorithm does not converge well. You are advised to use the LinearRegression estimator\n",
      "  estimator.fit(X_train, y_train, **fit_params)\n",
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:477: UserWarning: Coordinate descent with no regularization may lead to unexpected results and is discouraged.\n",
      "  positive)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Ridge: -0.017121057060696436\n",
      "best parameters: {'alpha': 1.0}\n",
      "Lasso: -0.017096026211408922\n",
      "best parameters: {'alpha': 0.001}\n",
      "ElasticNet: -0.016691137519141204\n",
      "best parameters: {'alpha': 0.001}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\coordinate_descent.py:491: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations. Fitting data with very small alpha may cause precision problems.\n",
      "  ConvergenceWarning)\n"
     ]
    }
   ],
   "source": [
    "#尝试岭回归、Lasso回归和弹性网络回归，利用带交叉验证的网格搜索函数寻找最优模型\n",
    "alphas = np.array([1,0.1,0.01,0.001,0.0001,0])\n",
    "ridge = Ridge()\n",
    "laso = Lasso()\n",
    "elastic = ElasticNet()\n",
    "grid = GridSearchCV(ridge, dict(alpha=alphas),scoring = 'neg_mean_squared_error',cv = 3 , n_jobs = 1)\n",
    "gr = grid.fit(x_train, y_train)\n",
    "grid1 = GridSearchCV(laso, dict(alpha=alphas),scoring = 'neg_mean_squared_error',cv = 3 , n_jobs = 1)\n",
    "gr1 = grid1.fit(x_train, y_train)\n",
    "grid2 = GridSearchCV(elastic, dict(alpha=alphas),scoring = 'neg_mean_squared_error',cv = 3 , n_jobs = 1)\n",
    "gr2 = grid2.fit(x_train, y_train)\n",
    "print(\"Ridge:\",gr.best_score_)\n",
    "print(\"best parameters:\",gr.best_params_)\n",
    "print(\"Lasso:\",gr1.best_score_)\n",
    "print(\"best parameters:\",gr1.best_params_)\n",
    "print(\"ElasticNet:\",gr2.best_score_)\n",
    "print(\"best parameters:\",gr2.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Ridge: 0.774\n",
      "Lasso: 0.752\n",
      "ElasticNet: 0.765\n"
     ]
    }
   ],
   "source": [
    "#查看各个模型在测试集上的得分。\n",
    "be = gr.best_estimator_\n",
    "be.fit(x_train,y_train)\n",
    "print('Ridge: %.3f' % be.score(x_test, y_test))\n",
    "be1 = gr1.best_estimator_\n",
    "be1.fit(x_train,y_train)\n",
    "print('Lasso: %.3f' % be1.score(x_test, y_test))\n",
    "be2 = gr2.best_estimator_\n",
    "be2.fit(x_train,y_train)\n",
    "print('ElasticNet: %.3f' % be2.score(x_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "RandomForest: -0.022433495050008082\n",
      "best parameters: {'max_depth': 15, 'max_features': 'auto', 'n_estimators': 30}\n"
     ]
    }
   ],
   "source": [
    "#尝试随机森林模型，利用带交叉验证的网格搜索函数寻找最优参数设置。\n",
    "params_rf = { 'max_depth': [5, 10, 15, 20, None], 'max_features': ['auto', 'log2'], \n",
    "             'n_estimators': [5, 10, 15, 20, 25, 30]}\n",
    "\n",
    "rf = RandomForestRegressor(random_state = 1, bootstrap = True)\n",
    "grids = GridSearchCV(estimator = rf, param_grid = params_rf, \n",
    "                     scoring = 'neg_mean_squared_error', cv = 3 , n_jobs = 1)\n",
    "gs = grids.fit(x_train, y_train)\n",
    "print(\"RandomForest:\",gs.best_score_)\n",
    "print(\"best parameters:\",gs.best_params_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "RandomForest: 0.874\n"
     ]
    }
   ],
   "source": [
    "#查看随机森林模型在测试集上的得分。\n",
    "bef = gs.best_estimator_\n",
    "bef.fit(x_train,y_train)\n",
    "print('RandomForest: %.3f' % bef.score(x_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([127407.84217616, 154794.62581304, 181743.56091323, ...,\n",
       "       161383.96446025, 113963.71123706, 224750.71291809])"
      ]
     },
     "execution_count": 117,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#对test集进行预测。\n",
    "model = RandomForestRegressor(random_state = 1, bootstrap = True, max_depth = None, \n",
    "                              max_features = 'auto', n_estimators = 30)\n",
    "model.fit(dff_train.drop(['SalePrice'], axis = 1), dff_train['SalePrice'])\n",
    "pr = model.predict(dff_test)\n",
    "#由于之前进行了对数变换，现在必须进行逆变换。\n",
    "pr = np.expm1(pr)\n",
    "pr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {},
   "outputs": [],
   "source": [
    "#生成提交文件。\n",
    "df_test = pd.read_csv('C:/Users/Lenovo/Documents/test.csv')\n",
    "sample_submission= pd.DataFrame({'Id':np.asarray(df_test.Id), 'SalePrice':pr})\n",
    "outputpath = 'C:/Users/Lenovo/Documents/submission2.csv'\n",
    "sample_submission.to_csv(outputpath,sep=',',index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
